diff --git "a/nncf_output.log" "b/nncf_output.log" new file mode 100644--- /dev/null +++ "b/nncf_output.log" @@ -0,0 +1,33188 @@ +INFO:nncf:Ignored adding weight quantizer for: BertForSequenceClassification/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[word_embeddings]/embedding_0 +INFO:nncf:Ignored adding weight quantizer for: BertForSequenceClassification/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[token_type_embeddings]/embedding_0 +INFO:nncf:Ignored adding weight quantizer for: BertForSequenceClassification/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[position_embeddings]/embedding_0 +INFO:nncf:Not adding activation input quantizer for operation: 4 BertForSequenceClassification/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[word_embeddings]/embedding_0 +INFO:nncf:Not adding activation input quantizer for operation: 5 BertForSequenceClassification/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[token_type_embeddings]/embedding_0 +INFO:nncf:Not adding activation input quantizer for operation: 6 BertForSequenceClassification/BertModel[bert]/BertEmbeddings[embeddings]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 8 BertForSequenceClassification/BertModel[bert]/BertEmbeddings[embeddings]/__iadd___0 +INFO:nncf:Not adding activation input quantizer for operation: 9 BertForSequenceClassification/BertModel[bert]/BertEmbeddings[embeddings]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 10 BertForSequenceClassification/BertModel[bert]/BertEmbeddings[embeddings]/Dropout[dropout]/dropout_0 +INFO:nncf:Not adding activation input quantizer for operation: 23 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 26 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfAttention[self]/matmul_1 +INFO:nncf:Not adding activation input quantizer for operation: 32 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 33 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertAttention[attention]/BertSelfOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 38 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 39 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 52 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 55 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfAttention[self]/matmul_1 +INFO:nncf:Not adding activation input quantizer for operation: 61 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 62 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertAttention[attention]/BertSelfOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 67 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 68 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 81 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 84 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfAttention[self]/matmul_1 +INFO:nncf:Not adding activation input quantizer for operation: 90 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 91 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertAttention[attention]/BertSelfOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 96 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 97 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 110 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 113 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfAttention[self]/matmul_1 +INFO:nncf:Not adding activation input quantizer for operation: 119 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 120 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertAttention[attention]/BertSelfOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 125 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 126 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 139 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 142 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfAttention[self]/matmul_1 +INFO:nncf:Not adding activation input quantizer for operation: 148 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 149 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertAttention[attention]/BertSelfOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 154 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 155 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 168 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 171 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfAttention[self]/matmul_1 +INFO:nncf:Not adding activation input quantizer for operation: 177 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 178 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertAttention[attention]/BertSelfOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 183 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 184 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 197 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 200 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfAttention[self]/matmul_1 +INFO:nncf:Not adding activation input quantizer for operation: 206 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 207 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertAttention[attention]/BertSelfOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 212 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 213 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 226 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 229 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfAttention[self]/matmul_1 +INFO:nncf:Not adding activation input quantizer for operation: 235 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 236 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertAttention[attention]/BertSelfOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 241 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 242 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 255 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 258 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfAttention[self]/matmul_1 +INFO:nncf:Not adding activation input quantizer for operation: 264 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 265 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertAttention[attention]/BertSelfOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 270 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 271 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 284 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 287 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfAttention[self]/matmul_1 +INFO:nncf:Not adding activation input quantizer for operation: 293 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 294 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertAttention[attention]/BertSelfOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 299 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 300 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 313 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 316 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfAttention[self]/matmul_1 +INFO:nncf:Not adding activation input quantizer for operation: 322 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 323 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertAttention[attention]/BertSelfOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 328 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 329 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 342 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 345 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfAttention[self]/matmul_1 +INFO:nncf:Not adding activation input quantizer for operation: 351 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 352 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertAttention[attention]/BertSelfOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Not adding activation input quantizer for operation: 357 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[output]/__add___0 +INFO:nncf:Not adding activation input quantizer for operation: 358 BertForSequenceClassification/BertModel[bert]/BertEncoder[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[output]/NNCFLayerNorm[LayerNorm]/layer_norm_0 +INFO:nncf:Collecting tensor statistics |█ | 1 / 10 +INFO:nncf:Collecting tensor statistics |███ | 2 / 10 +INFO:nncf:Collecting tensor statistics |████ | 3 / 10 +INFO:nncf:Collecting tensor statistics |██████ | 4 / 10 +INFO:nncf:Collecting tensor statistics |████████ | 5 / 10 +INFO:nncf:Collecting tensor statistics |█████████ | 6 / 10 +INFO:nncf:Collecting tensor statistics |███████████ | 7 / 10 +INFO:nncf:Collecting tensor statistics |████████████ | 8 / 10 +INFO:nncf:Collecting tensor statistics |██████████████ | 9 / 10 +INFO:nncf:Collecting tensor statistics |████████████████| 10 / 10 +INFO:nncf:Ignored adding weight sparsifier for operation: BertForSequenceClassification/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[word_embeddings]/embedding_0 +INFO:nncf:Ignored adding weight sparsifier for operation: BertForSequenceClassification/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[position_embeddings]/embedding_0 +INFO:nncf:Ignored adding weight sparsifier for operation: BertForSequenceClassification/BertModel[bert]/BertEmbeddings[embeddings]/NNCFEmbedding[token_type_embeddings]/embedding_0 +INFO:nncf:Ignored adding weight sparsifier for operation: BertForSequenceClassification/BertModel[bert]/BertPooler[pooler]/NNCFLinear[dense]/linear_0 +INFO:nncf:Ignored adding weight sparsifier for operation: BertForSequenceClassification/NNCFLinear[classifier]/linear_0 +INFO:nncf:Compiling and loading torch extension: quantized_functions_cuda... +INFO:nncf:Finished loading torch extension: quantized_functions_cuda +INFO:nncf:Statistics of the quantization algorithm: +Epoch 0 |+--------------------------------+-------+ +Epoch 0 || Statistic's name | Value | +Epoch 0 |+================================+=======+ +Epoch 0 || Ratio of enabled quantizations | 100 | +Epoch 0 |+--------------------------------+-------+ +Epoch 0 | +Epoch 0 |Statistics of the quantization share: +Epoch 0 |+----------------------------------+--------------------+ +Epoch 0 || Statistic's name | Value | +Epoch 0 |+==================================+====================+ +Epoch 0 || Symmetric WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 0 |+----------------------------------+--------------------+ +Epoch 0 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 0 |+----------------------------------+--------------------+ +Epoch 0 || Signed WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 0 |+----------------------------------+--------------------+ +Epoch 0 || Unsigned WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 0 |+----------------------------------+--------------------+ +Epoch 0 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 0 |+----------------------------------+--------------------+ +Epoch 0 || Per-channel WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 0 |+----------------------------------+--------------------+ +Epoch 0 || Placed WQs / Potential WQs | 72.55 % (74 / 102) | +Epoch 0 |+----------------------------------+--------------------+ +Epoch 0 || Symmetric AQs / All placed AQs | 24.24 % (24 / 99) | +Epoch 0 |+----------------------------------+--------------------+ +Epoch 0 || Asymmetric AQs / All placed AQs | 75.76 % (75 / 99) | +Epoch 0 |+----------------------------------+--------------------+ +Epoch 0 || Signed AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 0 |+----------------------------------+--------------------+ +Epoch 0 || Unsigned AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 0 |+----------------------------------+--------------------+ +Epoch 0 || Per-tensor AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 0 |+----------------------------------+--------------------+ +Epoch 0 || Per-channel AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 0 |+----------------------------------+--------------------+ +Epoch 0 | +Epoch 0 |Statistics of the bitwidth distribution: +Epoch 0 |+--------------+---------------------+--------------------+--------------------+ +Epoch 0 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 0 || | WQs | Placed AQs | Qs | +Epoch 0 |+==============+=====================+====================+====================+ +Epoch 0 || 8 | 100.00 % (74 / 74) | 100.00 % (99 / 99) | 100.00 % (173 / | +Epoch 0 || | | | 173) | +Epoch 0 |+--------------+---------------------+--------------------+--------------------+ +Epoch 0 | +Epoch 0 |Statistics of the sparsified model: +Epoch 0 |+-----------------------------------------+-------+ +Epoch 0 || Statistic's name | Value | +Epoch 0 |+=========================================+=======+ +Epoch 0 || Sparsity level of the whole model | 0.000 | +Epoch 0 |+-----------------------------------------+-------+ +Epoch 0 || Sparsity level of all sparsified layers | 0.000 | +Epoch 0 |+-----------------------------------------+-------+ +Epoch 0 | +Epoch 0 |Statistics by sparsified layers: +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 0 |+======================+================+================+=====================+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[0]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[qu | | | | +Epoch 0 || ery]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[0]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[ke | | | | +Epoch 0 || y]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[0]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[va | | | | +Epoch 0 || lue]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[0]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfOutput[ou | | | | +Epoch 0 || tput]/NNCFLinear[den | | | | +Epoch 0 || se]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [3072, 768] | 0 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[0]/Be | | | | +Epoch 0 || rtIntermediate[inter | | | | +Epoch 0 || mediate]/NNCFLinear[ | | | | +Epoch 0 || dense]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 3072] | 0 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[0]/Be | | | | +Epoch 0 || rtOutput[output]/NNC | | | | +Epoch 0 || FLinear[dense]/linea | | | | +Epoch 0 || r_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[1]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[qu | | | | +Epoch 0 || ery]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[1]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[ke | | | | +Epoch 0 || y]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[1]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[va | | | | +Epoch 0 || lue]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[1]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfOutput[ou | | | | +Epoch 0 || tput]/NNCFLinear[den | | | | +Epoch 0 || se]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [3072, 768] | 0 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[1]/Be | | | | +Epoch 0 || rtIntermediate[inter | | | | +Epoch 0 || mediate]/NNCFLinear[ | | | | +Epoch 0 || dense]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 3072] | 0 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[1]/Be | | | | +Epoch 0 || rtOutput[output]/NNC | | | | +Epoch 0 || FLinear[dense]/linea | | | | +Epoch 0 || r_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[2]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[qu | | | | +Epoch 0 || ery]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[2]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[ke | | | | +Epoch 0 || y]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[2]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[va | | | | +Epoch 0 || lue]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[2]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfOutput[ou | | | | +Epoch 0 || tput]/NNCFLinear[den | | | | +Epoch 0 || se]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [3072, 768] | 0 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[2]/Be | | | | +Epoch 0 || rtIntermediate[inter | | | | +Epoch 0 || mediate]/NNCFLinear[ | | | | +Epoch 0 || dense]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 3072] | 0 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[2]/Be | | | | +Epoch 0 || rtOutput[output]/NNC | | | | +Epoch 0 || FLinear[dense]/linea | | | | +Epoch 0 || r_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[3]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[qu | | | | +Epoch 0 || ery]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[3]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[ke | | | | +Epoch 0 || y]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[3]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[va | | | | +Epoch 0 || lue]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[3]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfOutput[ou | | | | +Epoch 0 || tput]/NNCFLinear[den | | | | +Epoch 0 || se]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [3072, 768] | 0 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[3]/Be | | | | +Epoch 0 || rtIntermediate[inter | | | | +Epoch 0 || mediate]/NNCFLinear[ | | | | +Epoch 0 || dense]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 3072] | 0 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[3]/Be | | | | +Epoch 0 || rtOutput[output]/NNC | | | | +Epoch 0 || FLinear[dense]/linea | | | | +Epoch 0 || r_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[4]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[qu | | | | +Epoch 0 || ery]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[4]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[ke | | | | +Epoch 0 || y]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[4]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[va | | | | +Epoch 0 || lue]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[4]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfOutput[ou | | | | +Epoch 0 || tput]/NNCFLinear[den | | | | +Epoch 0 || se]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [3072, 768] | 0 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[4]/Be | | | | +Epoch 0 || rtIntermediate[inter | | | | +Epoch 0 || mediate]/NNCFLinear[ | | | | +Epoch 0 || dense]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 3072] | 0 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[4]/Be | | | | +Epoch 0 || rtOutput[output]/NNC | | | | +Epoch 0 || FLinear[dense]/linea | | | | +Epoch 0 || r_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[5]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[qu | | | | +Epoch 0 || ery]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[5]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[ke | | | | +Epoch 0 || y]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[5]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[va | | | | +Epoch 0 || lue]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[5]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfOutput[ou | | | | +Epoch 0 || tput]/NNCFLinear[den | | | | +Epoch 0 || se]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [3072, 768] | 0 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[5]/Be | | | | +Epoch 0 || rtIntermediate[inter | | | | +Epoch 0 || mediate]/NNCFLinear[ | | | | +Epoch 0 || dense]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 3072] | 0 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[5]/Be | | | | +Epoch 0 || rtOutput[output]/NNC | | | | +Epoch 0 || FLinear[dense]/linea | | | | +Epoch 0 || r_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[6]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[qu | | | | +Epoch 0 || ery]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[6]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[ke | | | | +Epoch 0 || y]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[6]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[va | | | | +Epoch 0 || lue]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[6]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfOutput[ou | | | | +Epoch 0 || tput]/NNCFLinear[den | | | | +Epoch 0 || se]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [3072, 768] | 0.000 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[6]/Be | | | | +Epoch 0 || rtIntermediate[inter | | | | +Epoch 0 || mediate]/NNCFLinear[ | | | | +Epoch 0 || dense]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 3072] | 0 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[6]/Be | | | | +Epoch 0 || rtOutput[output]/NNC | | | | +Epoch 0 || FLinear[dense]/linea | | | | +Epoch 0 || r_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[7]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[qu | | | | +Epoch 0 || ery]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[7]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[ke | | | | +Epoch 0 || y]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[7]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[va | | | | +Epoch 0 || lue]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[7]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfOutput[ou | | | | +Epoch 0 || tput]/NNCFLinear[den | | | | +Epoch 0 || se]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [3072, 768] | 0 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[7]/Be | | | | +Epoch 0 || rtIntermediate[inter | | | | +Epoch 0 || mediate]/NNCFLinear[ | | | | +Epoch 0 || dense]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 3072] | 0 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[7]/Be | | | | +Epoch 0 || rtOutput[output]/NNC | | | | +Epoch 0 || FLinear[dense]/linea | | | | +Epoch 0 || r_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[8]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[qu | | | | +Epoch 0 || ery]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[8]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[ke | | | | +Epoch 0 || y]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[8]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[va | | | | +Epoch 0 || lue]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[8]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfOutput[ou | | | | +Epoch 0 || tput]/NNCFLinear[den | | | | +Epoch 0 || se]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [3072, 768] | 0 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[8]/Be | | | | +Epoch 0 || rtIntermediate[inter | | | | +Epoch 0 || mediate]/NNCFLinear[ | | | | +Epoch 0 || dense]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 3072] | 0 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[8]/Be | | | | +Epoch 0 || rtOutput[output]/NNC | | | | +Epoch 0 || FLinear[dense]/linea | | | | +Epoch 0 || r_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[9]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[qu | | | | +Epoch 0 || ery]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[9]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[ke | | | | +Epoch 0 || y]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[9]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfAttention | | | | +Epoch 0 || [self]/NNCFLinear[va | | | | +Epoch 0 || lue]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[9]/Be | | | | +Epoch 0 || rtAttention[attentio | | | | +Epoch 0 || n]/BertSelfOutput[ou | | | | +Epoch 0 || tput]/NNCFLinear[den | | | | +Epoch 0 || se]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [3072, 768] | 0 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[9]/Be | | | | +Epoch 0 || rtIntermediate[inter | | | | +Epoch 0 || mediate]/NNCFLinear[ | | | | +Epoch 0 || dense]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 3072] | 0 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[9]/Be | | | | +Epoch 0 || rtOutput[output]/NNC | | | | +Epoch 0 || FLinear[dense]/linea | | | | +Epoch 0 || r_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[10]/B | | | | +Epoch 0 || ertAttention[attenti | | | | +Epoch 0 || on]/BertSelfAttentio | | | | +Epoch 0 || n[self]/NNCFLinear[q | | | | +Epoch 0 || uery]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[10]/B | | | | +Epoch 0 || ertAttention[attenti | | | | +Epoch 0 || on]/BertSelfAttentio | | | | +Epoch 0 || n[self]/NNCFLinear[k | | | | +Epoch 0 || ey]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[10]/B | | | | +Epoch 0 || ertAttention[attenti | | | | +Epoch 0 || on]/BertSelfAttentio | | | | +Epoch 0 || n[self]/NNCFLinear[v | | | | +Epoch 0 || alue]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[10]/B | | | | +Epoch 0 || ertAttention[attenti | | | | +Epoch 0 || on]/BertSelfOutput[o | | | | +Epoch 0 || utput]/NNCFLinear[de | | | | +Epoch 0 || nse]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [3072, 768] | 0 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[10]/B | | | | +Epoch 0 || ertIntermediate[inte | | | | +Epoch 0 || rmediate]/NNCFLinear | | | | +Epoch 0 || [dense]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 3072] | 0 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[10]/B | | | | +Epoch 0 || ertOutput[output]/NN | | | | +Epoch 0 || CFLinear[dense]/line | | | | +Epoch 0 || ar_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[11]/B | | | | +Epoch 0 || ertAttention[attenti | | | | +Epoch 0 || on]/BertSelfAttentio | | | | +Epoch 0 || n[self]/NNCFLinear[q | | | | +Epoch 0 || uery]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[11]/B | | | | +Epoch 0 || ertAttention[attenti | | | | +Epoch 0 || on]/BertSelfAttentio | | | | +Epoch 0 || n[self]/NNCFLinear[k | | | | +Epoch 0 || ey]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[11]/B | | | | +Epoch 0 || ertAttention[attenti | | | | +Epoch 0 || on]/BertSelfAttentio | | | | +Epoch 0 || n[self]/NNCFLinear[v | | | | +Epoch 0 || alue]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 768] | 0 | 0.694 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[11]/B | | | | +Epoch 0 || ertAttention[attenti | | | | +Epoch 0 || on]/BertSelfOutput[o | | | | +Epoch 0 || utput]/NNCFLinear[de | | | | +Epoch 0 || nse]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [3072, 768] | 0 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[11]/B | | | | +Epoch 0 || ertIntermediate[inte | | | | +Epoch 0 || rmediate]/NNCFLinear | | | | +Epoch 0 || [dense]/linear_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 || BertForSequenceClass | [768, 3072] | 0 | 2.778 | +Epoch 0 || ification/BertModel[ | | | | +Epoch 0 || bert]/BertEncoder[en | | | | +Epoch 0 || coder]/ModuleList[la | | | | +Epoch 0 || yer]/BertLayer[11]/B | | | | +Epoch 0 || ertOutput[output]/NN | | | | +Epoch 0 || CFLinear[dense]/line | | | | +Epoch 0 || ar_0 | | | | +Epoch 0 |+----------------------+----------------+----------------+---------------------+ +Epoch 0 | +Epoch 0 |Statistics of the magnitude sparsity algorithm: +Epoch 0 |+----------------------------------------------------------------------+-------+ +Epoch 0 || Statistic's name | Value | +Epoch 0 |+======================================================================+=======+ +Epoch 0 || A target level of the sparsity for the algorithm for the current | 0 | +Epoch 0 || epoch | | +Epoch 0 |+----------------------------------------------------------------------+-------+ +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || Layer's name | Sparsity threshold | +Epoch 0 |+=========================================================+====================+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 0 || linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 0 || near_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 0 || linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 0 || inear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermed | | +Epoch 0 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[o | | +Epoch 0 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 0 || linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 0 || near_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 0 || linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 0 || inear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermed | | +Epoch 0 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[o | | +Epoch 0 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 0 || linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 0 || near_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 0 || linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 0 || inear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermed | | +Epoch 0 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[o | | +Epoch 0 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 0 || linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 0 || near_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 0 || linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 0 || inear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermed | | +Epoch 0 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[o | | +Epoch 0 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 0 || linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 0 || near_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 0 || linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 0 || inear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermed | | +Epoch 0 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[o | | +Epoch 0 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 0 || linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 0 || near_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 0 || linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 0 || inear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermed | | +Epoch 0 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[o | | +Epoch 0 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 0 || linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 0 || near_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 0 || linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 0 || inear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermed | | +Epoch 0 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[o | | +Epoch 0 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 0 || linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 0 || near_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 0 || linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 0 || inear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermed | | +Epoch 0 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[o | | +Epoch 0 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 0 || linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 0 || near_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 0 || linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 0 || inear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermed | | +Epoch 0 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[o | | +Epoch 0 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 0 || linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 0 || near_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 0 || linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 0 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 0 || inear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermed | | +Epoch 0 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[o | | +Epoch 0 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 0 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 0 || /linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 0 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 0 || inear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 0 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 0 || /linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 0 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 0 || linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertInterme | | +Epoch 0 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[ | | +Epoch 0 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 0 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 0 || /linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 0 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 0 || inear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 0 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 0 || /linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 0 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 0 || linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertInterme | | +Epoch 0 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +Epoch 0 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 0 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[ | | +Epoch 0 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 0 |+---------------------------------------------------------+--------------------+ +INFO:nncf:Statistics of the quantization algorithm: +Epoch 1 |+--------------------------------+-------+ +Epoch 1 || Statistic's name | Value | +Epoch 1 |+================================+=======+ +Epoch 1 || Ratio of enabled quantizations | 100 | +Epoch 1 |+--------------------------------+-------+ +Epoch 1 | +Epoch 1 |Statistics of the quantization share: +Epoch 1 |+----------------------------------+--------------------+ +Epoch 1 || Statistic's name | Value | +Epoch 1 |+==================================+====================+ +Epoch 1 || Symmetric WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 1 |+----------------------------------+--------------------+ +Epoch 1 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 1 |+----------------------------------+--------------------+ +Epoch 1 || Signed WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 1 |+----------------------------------+--------------------+ +Epoch 1 || Unsigned WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 1 |+----------------------------------+--------------------+ +Epoch 1 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 1 |+----------------------------------+--------------------+ +Epoch 1 || Per-channel WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 1 |+----------------------------------+--------------------+ +Epoch 1 || Placed WQs / Potential WQs | 72.55 % (74 / 102) | +Epoch 1 |+----------------------------------+--------------------+ +Epoch 1 || Symmetric AQs / All placed AQs | 24.24 % (24 / 99) | +Epoch 1 |+----------------------------------+--------------------+ +Epoch 1 || Asymmetric AQs / All placed AQs | 75.76 % (75 / 99) | +Epoch 1 |+----------------------------------+--------------------+ +Epoch 1 || Signed AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 1 |+----------------------------------+--------------------+ +Epoch 1 || Unsigned AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 1 |+----------------------------------+--------------------+ +Epoch 1 || Per-tensor AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 1 |+----------------------------------+--------------------+ +Epoch 1 || Per-channel AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 1 |+----------------------------------+--------------------+ +Epoch 1 | +Epoch 1 |Statistics of the bitwidth distribution: +Epoch 1 |+--------------+---------------------+--------------------+--------------------+ +Epoch 1 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 1 || | WQs | Placed AQs | Qs | +Epoch 1 |+==============+=====================+====================+====================+ +Epoch 1 || 8 | 100.00 % (74 / 74) | 100.00 % (99 / 99) | 100.00 % (173 / | +Epoch 1 || | | | 173) | +Epoch 1 |+--------------+---------------------+--------------------+--------------------+ +Epoch 1 | +Epoch 1 |Statistics of the sparsified model: +Epoch 1 |+-----------------------------------------+-------+ +Epoch 1 || Statistic's name | Value | +Epoch 1 |+=========================================+=======+ +Epoch 1 || Sparsity level of the whole model | 0.072 | +Epoch 1 |+-----------------------------------------+-------+ +Epoch 1 || Sparsity level of all sparsified layers | 0.092 | +Epoch 1 |+-----------------------------------------+-------+ +Epoch 1 | +Epoch 1 |Statistics by sparsified layers: +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 1 |+======================+================+================+=====================+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.064 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[0]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[qu | | | | +Epoch 1 || ery]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.065 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[0]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[ke | | | | +Epoch 1 || y]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.075 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[0]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[va | | | | +Epoch 1 || lue]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.077 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[0]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfOutput[ou | | | | +Epoch 1 || tput]/NNCFLinear[den | | | | +Epoch 1 || se]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072, 768] | 0.105 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[0]/Be | | | | +Epoch 1 || rtIntermediate[inter | | | | +Epoch 1 || mediate]/NNCFLinear[ | | | | +Epoch 1 || dense]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 3072] | 0.109 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[0]/Be | | | | +Epoch 1 || rtOutput[output]/NNC | | | | +Epoch 1 || FLinear[dense]/linea | | | | +Epoch 1 || r_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.063 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[1]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[qu | | | | +Epoch 1 || ery]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.063 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[1]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[ke | | | | +Epoch 1 || y]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.073 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[1]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[va | | | | +Epoch 1 || lue]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.077 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[1]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfOutput[ou | | | | +Epoch 1 || tput]/NNCFLinear[den | | | | +Epoch 1 || se]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072, 768] | 0.105 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[1]/Be | | | | +Epoch 1 || rtIntermediate[inter | | | | +Epoch 1 || mediate]/NNCFLinear[ | | | | +Epoch 1 || dense]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 3072] | 0.109 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[1]/Be | | | | +Epoch 1 || rtOutput[output]/NNC | | | | +Epoch 1 || FLinear[dense]/linea | | | | +Epoch 1 || r_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.064 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[2]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[qu | | | | +Epoch 1 || ery]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.065 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[2]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[ke | | | | +Epoch 1 || y]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.076 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[2]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[va | | | | +Epoch 1 || lue]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.079 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[2]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfOutput[ou | | | | +Epoch 1 || tput]/NNCFLinear[den | | | | +Epoch 1 || se]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072, 768] | 0.105 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[2]/Be | | | | +Epoch 1 || rtIntermediate[inter | | | | +Epoch 1 || mediate]/NNCFLinear[ | | | | +Epoch 1 || dense]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 3072] | 0.109 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[2]/Be | | | | +Epoch 1 || rtOutput[output]/NNC | | | | +Epoch 1 || FLinear[dense]/linea | | | | +Epoch 1 || r_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.062 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[3]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[qu | | | | +Epoch 1 || ery]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.063 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[3]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[ke | | | | +Epoch 1 || y]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.072 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[3]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[va | | | | +Epoch 1 || lue]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.075 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[3]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfOutput[ou | | | | +Epoch 1 || tput]/NNCFLinear[den | | | | +Epoch 1 || se]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072, 768] | 0.105 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[3]/Be | | | | +Epoch 1 || rtIntermediate[inter | | | | +Epoch 1 || mediate]/NNCFLinear[ | | | | +Epoch 1 || dense]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 3072] | 0.110 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[3]/Be | | | | +Epoch 1 || rtOutput[output]/NNC | | | | +Epoch 1 || FLinear[dense]/linea | | | | +Epoch 1 || r_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.063 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[4]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[qu | | | | +Epoch 1 || ery]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.063 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[4]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[ke | | | | +Epoch 1 || y]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.068 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[4]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[va | | | | +Epoch 1 || lue]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.070 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[4]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfOutput[ou | | | | +Epoch 1 || tput]/NNCFLinear[den | | | | +Epoch 1 || se]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072, 768] | 0.106 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[4]/Be | | | | +Epoch 1 || rtIntermediate[inter | | | | +Epoch 1 || mediate]/NNCFLinear[ | | | | +Epoch 1 || dense]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 3072] | 0.111 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[4]/Be | | | | +Epoch 1 || rtOutput[output]/NNC | | | | +Epoch 1 || FLinear[dense]/linea | | | | +Epoch 1 || r_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.062 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[5]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[qu | | | | +Epoch 1 || ery]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.062 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[5]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[ke | | | | +Epoch 1 || y]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.069 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[5]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[va | | | | +Epoch 1 || lue]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.071 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[5]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfOutput[ou | | | | +Epoch 1 || tput]/NNCFLinear[den | | | | +Epoch 1 || se]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072, 768] | 0.106 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[5]/Be | | | | +Epoch 1 || rtIntermediate[inter | | | | +Epoch 1 || mediate]/NNCFLinear[ | | | | +Epoch 1 || dense]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 3072] | 0.110 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[5]/Be | | | | +Epoch 1 || rtOutput[output]/NNC | | | | +Epoch 1 || FLinear[dense]/linea | | | | +Epoch 1 || r_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.061 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[6]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[qu | | | | +Epoch 1 || ery]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.062 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[6]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[ke | | | | +Epoch 1 || y]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.069 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[6]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[va | | | | +Epoch 1 || lue]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.071 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[6]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfOutput[ou | | | | +Epoch 1 || tput]/NNCFLinear[den | | | | +Epoch 1 || se]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072, 768] | 0.105 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[6]/Be | | | | +Epoch 1 || rtIntermediate[inter | | | | +Epoch 1 || mediate]/NNCFLinear[ | | | | +Epoch 1 || dense]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 3072] | 0.109 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[6]/Be | | | | +Epoch 1 || rtOutput[output]/NNC | | | | +Epoch 1 || FLinear[dense]/linea | | | | +Epoch 1 || r_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.060 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[7]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[qu | | | | +Epoch 1 || ery]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.061 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[7]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[ke | | | | +Epoch 1 || y]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.068 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[7]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[va | | | | +Epoch 1 || lue]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.068 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[7]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfOutput[ou | | | | +Epoch 1 || tput]/NNCFLinear[den | | | | +Epoch 1 || se]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072, 768] | 0.104 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[7]/Be | | | | +Epoch 1 || rtIntermediate[inter | | | | +Epoch 1 || mediate]/NNCFLinear[ | | | | +Epoch 1 || dense]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 3072] | 0.107 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[7]/Be | | | | +Epoch 1 || rtOutput[output]/NNC | | | | +Epoch 1 || FLinear[dense]/linea | | | | +Epoch 1 || r_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.059 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[8]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[qu | | | | +Epoch 1 || ery]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.060 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[8]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[ke | | | | +Epoch 1 || y]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.063 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[8]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[va | | | | +Epoch 1 || lue]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.064 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[8]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfOutput[ou | | | | +Epoch 1 || tput]/NNCFLinear[den | | | | +Epoch 1 || se]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072, 768] | 0.103 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[8]/Be | | | | +Epoch 1 || rtIntermediate[inter | | | | +Epoch 1 || mediate]/NNCFLinear[ | | | | +Epoch 1 || dense]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 3072] | 0.105 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[8]/Be | | | | +Epoch 1 || rtOutput[output]/NNC | | | | +Epoch 1 || FLinear[dense]/linea | | | | +Epoch 1 || r_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.057 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[9]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[qu | | | | +Epoch 1 || ery]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.058 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[9]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[ke | | | | +Epoch 1 || y]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.061 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[9]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfAttention | | | | +Epoch 1 || [self]/NNCFLinear[va | | | | +Epoch 1 || lue]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.062 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[9]/Be | | | | +Epoch 1 || rtAttention[attentio | | | | +Epoch 1 || n]/BertSelfOutput[ou | | | | +Epoch 1 || tput]/NNCFLinear[den | | | | +Epoch 1 || se]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072, 768] | 0.104 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[9]/Be | | | | +Epoch 1 || rtIntermediate[inter | | | | +Epoch 1 || mediate]/NNCFLinear[ | | | | +Epoch 1 || dense]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 3072] | 0.106 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[9]/Be | | | | +Epoch 1 || rtOutput[output]/NNC | | | | +Epoch 1 || FLinear[dense]/linea | | | | +Epoch 1 || r_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.057 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[10]/B | | | | +Epoch 1 || ertAttention[attenti | | | | +Epoch 1 || on]/BertSelfAttentio | | | | +Epoch 1 || n[self]/NNCFLinear[q | | | | +Epoch 1 || uery]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.057 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[10]/B | | | | +Epoch 1 || ertAttention[attenti | | | | +Epoch 1 || on]/BertSelfAttentio | | | | +Epoch 1 || n[self]/NNCFLinear[k | | | | +Epoch 1 || ey]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.062 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[10]/B | | | | +Epoch 1 || ertAttention[attenti | | | | +Epoch 1 || on]/BertSelfAttentio | | | | +Epoch 1 || n[self]/NNCFLinear[v | | | | +Epoch 1 || alue]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.062 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[10]/B | | | | +Epoch 1 || ertAttention[attenti | | | | +Epoch 1 || on]/BertSelfOutput[o | | | | +Epoch 1 || utput]/NNCFLinear[de | | | | +Epoch 1 || nse]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072, 768] | 0.102 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[10]/B | | | | +Epoch 1 || ertIntermediate[inte | | | | +Epoch 1 || rmediate]/NNCFLinear | | | | +Epoch 1 || [dense]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 3072] | 0.104 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[10]/B | | | | +Epoch 1 || ertOutput[output]/NN | | | | +Epoch 1 || CFLinear[dense]/line | | | | +Epoch 1 || ar_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.057 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[11]/B | | | | +Epoch 1 || ertAttention[attenti | | | | +Epoch 1 || on]/BertSelfAttentio | | | | +Epoch 1 || n[self]/NNCFLinear[q | | | | +Epoch 1 || uery]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.057 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[11]/B | | | | +Epoch 1 || ertAttention[attenti | | | | +Epoch 1 || on]/BertSelfAttentio | | | | +Epoch 1 || n[self]/NNCFLinear[k | | | | +Epoch 1 || ey]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.058 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[11]/B | | | | +Epoch 1 || ertAttention[attenti | | | | +Epoch 1 || on]/BertSelfAttentio | | | | +Epoch 1 || n[self]/NNCFLinear[v | | | | +Epoch 1 || alue]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 768] | 0.057 | 0.694 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[11]/B | | | | +Epoch 1 || ertAttention[attenti | | | | +Epoch 1 || on]/BertSelfOutput[o | | | | +Epoch 1 || utput]/NNCFLinear[de | | | | +Epoch 1 || nse]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [3072, 768] | 0.100 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[11]/B | | | | +Epoch 1 || ertIntermediate[inte | | | | +Epoch 1 || rmediate]/NNCFLinear | | | | +Epoch 1 || [dense]/linear_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 || BertForSequenceClass | [768, 3072] | 0.100 | 2.778 | +Epoch 1 || ification/BertModel[ | | | | +Epoch 1 || bert]/BertEncoder[en | | | | +Epoch 1 || coder]/ModuleList[la | | | | +Epoch 1 || yer]/BertLayer[11]/B | | | | +Epoch 1 || ertOutput[output]/NN | | | | +Epoch 1 || CFLinear[dense]/line | | | | +Epoch 1 || ar_0 | | | | +Epoch 1 |+----------------------+----------------+----------------+---------------------+ +Epoch 1 | +Epoch 1 |Statistics of the magnitude sparsity algorithm: +Epoch 1 |+----------------------------------------------------------------------+-------+ +Epoch 1 || Statistic's name | Value | +Epoch 1 |+======================================================================+=======+ +Epoch 1 || A target level of the sparsity for the algorithm for the current | 0.092 | +Epoch 1 || epoch | | +Epoch 1 |+----------------------------------------------------------------------+-------+ +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || Layer's name | Sparsity threshold | +Epoch 1 |+=========================================================+====================+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 1 || linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 1 || near_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 1 || linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 1 || inear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermed | | +Epoch 1 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[o | | +Epoch 1 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 1 || linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 1 || near_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 1 || linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 1 || inear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermed | | +Epoch 1 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[o | | +Epoch 1 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 1 || linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 1 || near_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 1 || linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 1 || inear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermed | | +Epoch 1 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[o | | +Epoch 1 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 1 || linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 1 || near_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 1 || linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 1 || inear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermed | | +Epoch 1 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[o | | +Epoch 1 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 1 || linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 1 || near_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 1 || linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 1 || inear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermed | | +Epoch 1 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[o | | +Epoch 1 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 1 || linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 1 || near_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 1 || linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 1 || inear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermed | | +Epoch 1 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[o | | +Epoch 1 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 1 || linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 1 || near_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 1 || linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 1 || inear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermed | | +Epoch 1 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[o | | +Epoch 1 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 1 || linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 1 || near_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 1 || linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 1 || inear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermed | | +Epoch 1 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[o | | +Epoch 1 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 1 || linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 1 || near_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 1 || linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 1 || inear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermed | | +Epoch 1 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[o | | +Epoch 1 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 1 || linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 1 || near_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 1 || linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 1 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 1 || inear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermed | | +Epoch 1 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[o | | +Epoch 1 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 1 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 1 || /linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 1 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 1 || inear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 1 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 1 || /linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 1 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 1 || linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertInterme | | +Epoch 1 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[ | | +Epoch 1 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 1 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 1 || /linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 1 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 1 || inear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 1 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 1 || /linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 1 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 1 || linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertInterme | | +Epoch 1 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +Epoch 1 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 1 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[ | | +Epoch 1 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 1 |+---------------------------------------------------------+--------------------+ +INFO:nncf:Statistics of the quantization algorithm: +Epoch 2 |+--------------------------------+-------+ +Epoch 2 || Statistic's name | Value | +Epoch 2 |+================================+=======+ +Epoch 2 || Ratio of enabled quantizations | 100 | +Epoch 2 |+--------------------------------+-------+ +Epoch 2 | +Epoch 2 |Statistics of the quantization share: +Epoch 2 |+----------------------------------+--------------------+ +Epoch 2 || Statistic's name | Value | +Epoch 2 |+==================================+====================+ +Epoch 2 || Symmetric WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 2 |+----------------------------------+--------------------+ +Epoch 2 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 2 |+----------------------------------+--------------------+ +Epoch 2 || Signed WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 2 |+----------------------------------+--------------------+ +Epoch 2 || Unsigned WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 2 |+----------------------------------+--------------------+ +Epoch 2 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 2 |+----------------------------------+--------------------+ +Epoch 2 || Per-channel WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 2 |+----------------------------------+--------------------+ +Epoch 2 || Placed WQs / Potential WQs | 72.55 % (74 / 102) | +Epoch 2 |+----------------------------------+--------------------+ +Epoch 2 || Symmetric AQs / All placed AQs | 24.24 % (24 / 99) | +Epoch 2 |+----------------------------------+--------------------+ +Epoch 2 || Asymmetric AQs / All placed AQs | 75.76 % (75 / 99) | +Epoch 2 |+----------------------------------+--------------------+ +Epoch 2 || Signed AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 2 |+----------------------------------+--------------------+ +Epoch 2 || Unsigned AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 2 |+----------------------------------+--------------------+ +Epoch 2 || Per-tensor AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 2 |+----------------------------------+--------------------+ +Epoch 2 || Per-channel AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 2 |+----------------------------------+--------------------+ +Epoch 2 | +Epoch 2 |Statistics of the bitwidth distribution: +Epoch 2 |+--------------+---------------------+--------------------+--------------------+ +Epoch 2 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 2 || | WQs | Placed AQs | Qs | +Epoch 2 |+==============+=====================+====================+====================+ +Epoch 2 || 8 | 100.00 % (74 / 74) | 100.00 % (99 / 99) | 100.00 % (173 / | +Epoch 2 || | | | 173) | +Epoch 2 |+--------------+---------------------+--------------------+--------------------+ +Epoch 2 | +Epoch 2 |Statistics of the sparsified model: +Epoch 2 |+-----------------------------------------+-------+ +Epoch 2 || Statistic's name | Value | +Epoch 2 |+=========================================+=======+ +Epoch 2 || Sparsity level of the whole model | 0.137 | +Epoch 2 |+-----------------------------------------+-------+ +Epoch 2 || Sparsity level of all sparsified layers | 0.177 | +Epoch 2 |+-----------------------------------------+-------+ +Epoch 2 | +Epoch 2 |Statistics by sparsified layers: +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 2 |+======================+================+================+=====================+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.115 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[0]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[qu | | | | +Epoch 2 || ery]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.117 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[0]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[ke | | | | +Epoch 2 || y]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.129 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[0]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[va | | | | +Epoch 2 || lue]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.133 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[0]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfOutput[ou | | | | +Epoch 2 || tput]/NNCFLinear[den | | | | +Epoch 2 || se]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072, 768] | 0.205 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[0]/Be | | | | +Epoch 2 || rtIntermediate[inter | | | | +Epoch 2 || mediate]/NNCFLinear[ | | | | +Epoch 2 || dense]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 3072] | 0.212 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[0]/Be | | | | +Epoch 2 || rtOutput[output]/NNC | | | | +Epoch 2 || FLinear[dense]/linea | | | | +Epoch 2 || r_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.113 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[1]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[qu | | | | +Epoch 2 || ery]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.114 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[1]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[ke | | | | +Epoch 2 || y]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.126 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[1]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[va | | | | +Epoch 2 || lue]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.132 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[1]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfOutput[ou | | | | +Epoch 2 || tput]/NNCFLinear[den | | | | +Epoch 2 || se]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072, 768] | 0.205 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[1]/Be | | | | +Epoch 2 || rtIntermediate[inter | | | | +Epoch 2 || mediate]/NNCFLinear[ | | | | +Epoch 2 || dense]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 3072] | 0.212 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[1]/Be | | | | +Epoch 2 || rtOutput[output]/NNC | | | | +Epoch 2 || FLinear[dense]/linea | | | | +Epoch 2 || r_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.117 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[2]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[qu | | | | +Epoch 2 || ery]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.118 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[2]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[ke | | | | +Epoch 2 || y]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.130 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[2]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[va | | | | +Epoch 2 || lue]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.134 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[2]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfOutput[ou | | | | +Epoch 2 || tput]/NNCFLinear[den | | | | +Epoch 2 || se]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072, 768] | 0.206 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[2]/Be | | | | +Epoch 2 || rtIntermediate[inter | | | | +Epoch 2 || mediate]/NNCFLinear[ | | | | +Epoch 2 || dense]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 3072] | 0.213 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[2]/Be | | | | +Epoch 2 || rtOutput[output]/NNC | | | | +Epoch 2 || FLinear[dense]/linea | | | | +Epoch 2 || r_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.113 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[3]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[qu | | | | +Epoch 2 || ery]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.114 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[3]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[ke | | | | +Epoch 2 || y]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.126 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[3]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[va | | | | +Epoch 2 || lue]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.129 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[3]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfOutput[ou | | | | +Epoch 2 || tput]/NNCFLinear[den | | | | +Epoch 2 || se]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072, 768] | 0.206 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[3]/Be | | | | +Epoch 2 || rtIntermediate[inter | | | | +Epoch 2 || mediate]/NNCFLinear[ | | | | +Epoch 2 || dense]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 3072] | 0.214 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[3]/Be | | | | +Epoch 2 || rtOutput[output]/NNC | | | | +Epoch 2 || FLinear[dense]/linea | | | | +Epoch 2 || r_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.113 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[4]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[qu | | | | +Epoch 2 || ery]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.113 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[4]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[ke | | | | +Epoch 2 || y]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.120 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[4]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[va | | | | +Epoch 2 || lue]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.125 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[4]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfOutput[ou | | | | +Epoch 2 || tput]/NNCFLinear[den | | | | +Epoch 2 || se]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072, 768] | 0.207 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[4]/Be | | | | +Epoch 2 || rtIntermediate[inter | | | | +Epoch 2 || mediate]/NNCFLinear[ | | | | +Epoch 2 || dense]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 3072] | 0.217 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[4]/Be | | | | +Epoch 2 || rtOutput[output]/NNC | | | | +Epoch 2 || FLinear[dense]/linea | | | | +Epoch 2 || r_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.112 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[5]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[qu | | | | +Epoch 2 || ery]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.112 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[5]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[ke | | | | +Epoch 2 || y]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.122 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[5]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[va | | | | +Epoch 2 || lue]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.124 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[5]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfOutput[ou | | | | +Epoch 2 || tput]/NNCFLinear[den | | | | +Epoch 2 || se]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072, 768] | 0.206 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[5]/Be | | | | +Epoch 2 || rtIntermediate[inter | | | | +Epoch 2 || mediate]/NNCFLinear[ | | | | +Epoch 2 || dense]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 3072] | 0.216 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[5]/Be | | | | +Epoch 2 || rtOutput[output]/NNC | | | | +Epoch 2 || FLinear[dense]/linea | | | | +Epoch 2 || r_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.111 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[6]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[qu | | | | +Epoch 2 || ery]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.111 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[6]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[ke | | | | +Epoch 2 || y]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.122 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[6]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[va | | | | +Epoch 2 || lue]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.124 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[6]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfOutput[ou | | | | +Epoch 2 || tput]/NNCFLinear[den | | | | +Epoch 2 || se]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072, 768] | 0.206 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[6]/Be | | | | +Epoch 2 || rtIntermediate[inter | | | | +Epoch 2 || mediate]/NNCFLinear[ | | | | +Epoch 2 || dense]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 3072] | 0.214 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[6]/Be | | | | +Epoch 2 || rtOutput[output]/NNC | | | | +Epoch 2 || FLinear[dense]/linea | | | | +Epoch 2 || r_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.110 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[7]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[qu | | | | +Epoch 2 || ery]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.111 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[7]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[ke | | | | +Epoch 2 || y]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.120 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[7]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[va | | | | +Epoch 2 || lue]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.121 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[7]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfOutput[ou | | | | +Epoch 2 || tput]/NNCFLinear[den | | | | +Epoch 2 || se]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072, 768] | 0.204 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[7]/Be | | | | +Epoch 2 || rtIntermediate[inter | | | | +Epoch 2 || mediate]/NNCFLinear[ | | | | +Epoch 2 || dense]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 3072] | 0.211 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[7]/Be | | | | +Epoch 2 || rtOutput[output]/NNC | | | | +Epoch 2 || FLinear[dense]/linea | | | | +Epoch 2 || r_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.110 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[8]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[qu | | | | +Epoch 2 || ery]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.110 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[8]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[ke | | | | +Epoch 2 || y]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.116 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[8]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[va | | | | +Epoch 2 || lue]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.117 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[8]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfOutput[ou | | | | +Epoch 2 || tput]/NNCFLinear[den | | | | +Epoch 2 || se]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072, 768] | 0.202 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[8]/Be | | | | +Epoch 2 || rtIntermediate[inter | | | | +Epoch 2 || mediate]/NNCFLinear[ | | | | +Epoch 2 || dense]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 3072] | 0.207 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[8]/Be | | | | +Epoch 2 || rtOutput[output]/NNC | | | | +Epoch 2 || FLinear[dense]/linea | | | | +Epoch 2 || r_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.107 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[9]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[qu | | | | +Epoch 2 || ery]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.109 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[9]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[ke | | | | +Epoch 2 || y]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.113 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[9]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfAttention | | | | +Epoch 2 || [self]/NNCFLinear[va | | | | +Epoch 2 || lue]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.113 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[9]/Be | | | | +Epoch 2 || rtAttention[attentio | | | | +Epoch 2 || n]/BertSelfOutput[ou | | | | +Epoch 2 || tput]/NNCFLinear[den | | | | +Epoch 2 || se]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072, 768] | 0.203 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[9]/Be | | | | +Epoch 2 || rtIntermediate[inter | | | | +Epoch 2 || mediate]/NNCFLinear[ | | | | +Epoch 2 || dense]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 3072] | 0.208 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[9]/Be | | | | +Epoch 2 || rtOutput[output]/NNC | | | | +Epoch 2 || FLinear[dense]/linea | | | | +Epoch 2 || r_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.107 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[10]/B | | | | +Epoch 2 || ertAttention[attenti | | | | +Epoch 2 || on]/BertSelfAttentio | | | | +Epoch 2 || n[self]/NNCFLinear[q | | | | +Epoch 2 || uery]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.107 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[10]/B | | | | +Epoch 2 || ertAttention[attenti | | | | +Epoch 2 || on]/BertSelfAttentio | | | | +Epoch 2 || n[self]/NNCFLinear[k | | | | +Epoch 2 || ey]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.116 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[10]/B | | | | +Epoch 2 || ertAttention[attenti | | | | +Epoch 2 || on]/BertSelfAttentio | | | | +Epoch 2 || n[self]/NNCFLinear[v | | | | +Epoch 2 || alue]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.114 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[10]/B | | | | +Epoch 2 || ertAttention[attenti | | | | +Epoch 2 || on]/BertSelfOutput[o | | | | +Epoch 2 || utput]/NNCFLinear[de | | | | +Epoch 2 || nse]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072, 768] | 0.200 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[10]/B | | | | +Epoch 2 || ertIntermediate[inte | | | | +Epoch 2 || rmediate]/NNCFLinear | | | | +Epoch 2 || [dense]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 3072] | 0.203 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[10]/B | | | | +Epoch 2 || ertOutput[output]/NN | | | | +Epoch 2 || CFLinear[dense]/line | | | | +Epoch 2 || ar_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.106 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[11]/B | | | | +Epoch 2 || ertAttention[attenti | | | | +Epoch 2 || on]/BertSelfAttentio | | | | +Epoch 2 || n[self]/NNCFLinear[q | | | | +Epoch 2 || uery]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.106 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[11]/B | | | | +Epoch 2 || ertAttention[attenti | | | | +Epoch 2 || on]/BertSelfAttentio | | | | +Epoch 2 || n[self]/NNCFLinear[k | | | | +Epoch 2 || ey]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.108 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[11]/B | | | | +Epoch 2 || ertAttention[attenti | | | | +Epoch 2 || on]/BertSelfAttentio | | | | +Epoch 2 || n[self]/NNCFLinear[v | | | | +Epoch 2 || alue]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 768] | 0.106 | 0.694 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[11]/B | | | | +Epoch 2 || ertAttention[attenti | | | | +Epoch 2 || on]/BertSelfOutput[o | | | | +Epoch 2 || utput]/NNCFLinear[de | | | | +Epoch 2 || nse]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [3072, 768] | 0.198 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[11]/B | | | | +Epoch 2 || ertIntermediate[inte | | | | +Epoch 2 || rmediate]/NNCFLinear | | | | +Epoch 2 || [dense]/linear_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 || BertForSequenceClass | [768, 3072] | 0.200 | 2.778 | +Epoch 2 || ification/BertModel[ | | | | +Epoch 2 || bert]/BertEncoder[en | | | | +Epoch 2 || coder]/ModuleList[la | | | | +Epoch 2 || yer]/BertLayer[11]/B | | | | +Epoch 2 || ertOutput[output]/NN | | | | +Epoch 2 || CFLinear[dense]/line | | | | +Epoch 2 || ar_0 | | | | +Epoch 2 |+----------------------+----------------+----------------+---------------------+ +Epoch 2 | +Epoch 2 |Statistics of the magnitude sparsity algorithm: +Epoch 2 |+----------------------------------------------------------------------+-------+ +Epoch 2 || Statistic's name | Value | +Epoch 2 |+======================================================================+=======+ +Epoch 2 || A target level of the sparsity for the algorithm for the current | 0.177 | +Epoch 2 || epoch | | +Epoch 2 |+----------------------------------------------------------------------+-------+ +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || Layer's name | Sparsity threshold | +Epoch 2 |+=========================================================+====================+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 2 || linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 2 || near_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 2 || linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 2 || inear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermed | | +Epoch 2 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[o | | +Epoch 2 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 2 || linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 2 || near_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 2 || linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 2 || inear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermed | | +Epoch 2 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[o | | +Epoch 2 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 2 || linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 2 || near_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 2 || linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 2 || inear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermed | | +Epoch 2 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[o | | +Epoch 2 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 2 || linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 2 || near_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 2 || linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 2 || inear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermed | | +Epoch 2 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[o | | +Epoch 2 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 2 || linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 2 || near_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 2 || linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 2 || inear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermed | | +Epoch 2 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[o | | +Epoch 2 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 2 || linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 2 || near_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 2 || linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 2 || inear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermed | | +Epoch 2 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[o | | +Epoch 2 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 2 || linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 2 || near_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 2 || linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 2 || inear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermed | | +Epoch 2 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[o | | +Epoch 2 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 2 || linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 2 || near_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 2 || linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 2 || inear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermed | | +Epoch 2 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[o | | +Epoch 2 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 2 || linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 2 || near_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 2 || linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 2 || inear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermed | | +Epoch 2 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[o | | +Epoch 2 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 2 || linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 2 || near_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 2 || linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 2 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 2 || inear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermed | | +Epoch 2 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[o | | +Epoch 2 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 2 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 2 || /linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 2 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 2 || inear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 2 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 2 || /linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 2 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 2 || linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertInterme | | +Epoch 2 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[ | | +Epoch 2 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 2 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 2 || /linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 2 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 2 || inear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 2 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 2 || /linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 2 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 2 || linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertInterme | | +Epoch 2 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +Epoch 2 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 2 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[ | | +Epoch 2 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 2 |+---------------------------------------------------------+--------------------+ +INFO:nncf:Statistics of the quantization algorithm: +Epoch 3 |+--------------------------------+-------+ +Epoch 3 || Statistic's name | Value | +Epoch 3 |+================================+=======+ +Epoch 3 || Ratio of enabled quantizations | 100 | +Epoch 3 |+--------------------------------+-------+ +Epoch 3 | +Epoch 3 |Statistics of the quantization share: +Epoch 3 |+----------------------------------+--------------------+ +Epoch 3 || Statistic's name | Value | +Epoch 3 |+==================================+====================+ +Epoch 3 || Symmetric WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 3 |+----------------------------------+--------------------+ +Epoch 3 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 3 |+----------------------------------+--------------------+ +Epoch 3 || Signed WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 3 |+----------------------------------+--------------------+ +Epoch 3 || Unsigned WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 3 |+----------------------------------+--------------------+ +Epoch 3 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 3 |+----------------------------------+--------------------+ +Epoch 3 || Per-channel WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 3 |+----------------------------------+--------------------+ +Epoch 3 || Placed WQs / Potential WQs | 72.55 % (74 / 102) | +Epoch 3 |+----------------------------------+--------------------+ +Epoch 3 || Symmetric AQs / All placed AQs | 24.24 % (24 / 99) | +Epoch 3 |+----------------------------------+--------------------+ +Epoch 3 || Asymmetric AQs / All placed AQs | 75.76 % (75 / 99) | +Epoch 3 |+----------------------------------+--------------------+ +Epoch 3 || Signed AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 3 |+----------------------------------+--------------------+ +Epoch 3 || Unsigned AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 3 |+----------------------------------+--------------------+ +Epoch 3 || Per-tensor AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 3 |+----------------------------------+--------------------+ +Epoch 3 || Per-channel AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 3 |+----------------------------------+--------------------+ +Epoch 3 | +Epoch 3 |Statistics of the bitwidth distribution: +Epoch 3 |+--------------+---------------------+--------------------+--------------------+ +Epoch 3 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 3 || | WQs | Placed AQs | Qs | +Epoch 3 |+==============+=====================+====================+====================+ +Epoch 3 || 8 | 100.00 % (74 / 74) | 100.00 % (99 / 99) | 100.00 % (173 / | +Epoch 3 || | | | 173) | +Epoch 3 |+--------------+---------------------+--------------------+--------------------+ +Epoch 3 | +Epoch 3 |Statistics of the sparsified model: +Epoch 3 |+-----------------------------------------+-------+ +Epoch 3 || Statistic's name | Value | +Epoch 3 |+=========================================+=======+ +Epoch 3 || Sparsity level of the whole model | 0.198 | +Epoch 3 |+-----------------------------------------+-------+ +Epoch 3 || Sparsity level of all sparsified layers | 0.255 | +Epoch 3 |+-----------------------------------------+-------+ +Epoch 3 | +Epoch 3 |Statistics by sparsified layers: +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 3 |+======================+================+================+=====================+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.162 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[0]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[qu | | | | +Epoch 3 || ery]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.165 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[0]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[ke | | | | +Epoch 3 || y]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.174 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[0]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[va | | | | +Epoch 3 || lue]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.180 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[0]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfOutput[ou | | | | +Epoch 3 || tput]/NNCFLinear[den | | | | +Epoch 3 || se]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072, 768] | 0.297 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[0]/Be | | | | +Epoch 3 || rtIntermediate[inter | | | | +Epoch 3 || mediate]/NNCFLinear[ | | | | +Epoch 3 || dense]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 3072] | 0.306 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[0]/Be | | | | +Epoch 3 || rtOutput[output]/NNC | | | | +Epoch 3 || FLinear[dense]/linea | | | | +Epoch 3 || r_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.160 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[1]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[qu | | | | +Epoch 3 || ery]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.161 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[1]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[ke | | | | +Epoch 3 || y]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.172 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[1]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[va | | | | +Epoch 3 || lue]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.179 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[1]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfOutput[ou | | | | +Epoch 3 || tput]/NNCFLinear[den | | | | +Epoch 3 || se]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072, 768] | 0.297 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[1]/Be | | | | +Epoch 3 || rtIntermediate[inter | | | | +Epoch 3 || mediate]/NNCFLinear[ | | | | +Epoch 3 || dense]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 3072] | 0.307 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[1]/Be | | | | +Epoch 3 || rtOutput[output]/NNC | | | | +Epoch 3 || FLinear[dense]/linea | | | | +Epoch 3 || r_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.165 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[2]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[qu | | | | +Epoch 3 || ery]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.166 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[2]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[ke | | | | +Epoch 3 || y]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.175 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[2]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[va | | | | +Epoch 3 || lue]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.180 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[2]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfOutput[ou | | | | +Epoch 3 || tput]/NNCFLinear[den | | | | +Epoch 3 || se]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072, 768] | 0.298 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[2]/Be | | | | +Epoch 3 || rtIntermediate[inter | | | | +Epoch 3 || mediate]/NNCFLinear[ | | | | +Epoch 3 || dense]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 3072] | 0.308 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[2]/Be | | | | +Epoch 3 || rtOutput[output]/NNC | | | | +Epoch 3 || FLinear[dense]/linea | | | | +Epoch 3 || r_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.157 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[3]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[qu | | | | +Epoch 3 || ery]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.159 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[3]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[ke | | | | +Epoch 3 || y]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.170 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[3]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[va | | | | +Epoch 3 || lue]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.174 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[3]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfOutput[ou | | | | +Epoch 3 || tput]/NNCFLinear[den | | | | +Epoch 3 || se]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072, 768] | 0.298 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[3]/Be | | | | +Epoch 3 || rtIntermediate[inter | | | | +Epoch 3 || mediate]/NNCFLinear[ | | | | +Epoch 3 || dense]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 3072] | 0.310 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[3]/Be | | | | +Epoch 3 || rtOutput[output]/NNC | | | | +Epoch 3 || FLinear[dense]/linea | | | | +Epoch 3 || r_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.158 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[4]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[qu | | | | +Epoch 3 || ery]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.158 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[4]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[ke | | | | +Epoch 3 || y]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.165 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[4]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[va | | | | +Epoch 3 || lue]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.171 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[4]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfOutput[ou | | | | +Epoch 3 || tput]/NNCFLinear[den | | | | +Epoch 3 || se]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072, 768] | 0.300 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[4]/Be | | | | +Epoch 3 || rtIntermediate[inter | | | | +Epoch 3 || mediate]/NNCFLinear[ | | | | +Epoch 3 || dense]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 3072] | 0.314 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[4]/Be | | | | +Epoch 3 || rtOutput[output]/NNC | | | | +Epoch 3 || FLinear[dense]/linea | | | | +Epoch 3 || r_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.157 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[5]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[qu | | | | +Epoch 3 || ery]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.158 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[5]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[ke | | | | +Epoch 3 || y]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.168 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[5]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[va | | | | +Epoch 3 || lue]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.170 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[5]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfOutput[ou | | | | +Epoch 3 || tput]/NNCFLinear[den | | | | +Epoch 3 || se]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072, 768] | 0.299 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[5]/Be | | | | +Epoch 3 || rtIntermediate[inter | | | | +Epoch 3 || mediate]/NNCFLinear[ | | | | +Epoch 3 || dense]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 3072] | 0.313 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[5]/Be | | | | +Epoch 3 || rtOutput[output]/NNC | | | | +Epoch 3 || FLinear[dense]/linea | | | | +Epoch 3 || r_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.156 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[6]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[qu | | | | +Epoch 3 || ery]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.157 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[6]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[ke | | | | +Epoch 3 || y]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.168 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[6]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[va | | | | +Epoch 3 || lue]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.171 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[6]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfOutput[ou | | | | +Epoch 3 || tput]/NNCFLinear[den | | | | +Epoch 3 || se]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072, 768] | 0.299 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[6]/Be | | | | +Epoch 3 || rtIntermediate[inter | | | | +Epoch 3 || mediate]/NNCFLinear[ | | | | +Epoch 3 || dense]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 3072] | 0.310 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[6]/Be | | | | +Epoch 3 || rtOutput[output]/NNC | | | | +Epoch 3 || FLinear[dense]/linea | | | | +Epoch 3 || r_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.155 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[7]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[qu | | | | +Epoch 3 || ery]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.156 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[7]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[ke | | | | +Epoch 3 || y]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.164 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[7]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[va | | | | +Epoch 3 || lue]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.167 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[7]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfOutput[ou | | | | +Epoch 3 || tput]/NNCFLinear[den | | | | +Epoch 3 || se]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072, 768] | 0.296 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[7]/Be | | | | +Epoch 3 || rtIntermediate[inter | | | | +Epoch 3 || mediate]/NNCFLinear[ | | | | +Epoch 3 || dense]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 3072] | 0.306 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[7]/Be | | | | +Epoch 3 || rtOutput[output]/NNC | | | | +Epoch 3 || FLinear[dense]/linea | | | | +Epoch 3 || r_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.155 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[8]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[qu | | | | +Epoch 3 || ery]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.155 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[8]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[ke | | | | +Epoch 3 || y]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.161 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[8]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[va | | | | +Epoch 3 || lue]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.162 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[8]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfOutput[ou | | | | +Epoch 3 || tput]/NNCFLinear[den | | | | +Epoch 3 || se]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072, 768] | 0.293 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[8]/Be | | | | +Epoch 3 || rtIntermediate[inter | | | | +Epoch 3 || mediate]/NNCFLinear[ | | | | +Epoch 3 || dense]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 3072] | 0.302 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[8]/Be | | | | +Epoch 3 || rtOutput[output]/NNC | | | | +Epoch 3 || FLinear[dense]/linea | | | | +Epoch 3 || r_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.153 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[9]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[qu | | | | +Epoch 3 || ery]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.155 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[9]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[ke | | | | +Epoch 3 || y]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.159 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[9]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfAttention | | | | +Epoch 3 || [self]/NNCFLinear[va | | | | +Epoch 3 || lue]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.159 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[9]/Be | | | | +Epoch 3 || rtAttention[attentio | | | | +Epoch 3 || n]/BertSelfOutput[ou | | | | +Epoch 3 || tput]/NNCFLinear[den | | | | +Epoch 3 || se]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072, 768] | 0.296 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[9]/Be | | | | +Epoch 3 || rtIntermediate[inter | | | | +Epoch 3 || mediate]/NNCFLinear[ | | | | +Epoch 3 || dense]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 3072] | 0.303 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[9]/Be | | | | +Epoch 3 || rtOutput[output]/NNC | | | | +Epoch 3 || FLinear[dense]/linea | | | | +Epoch 3 || r_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.153 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[10]/B | | | | +Epoch 3 || ertAttention[attenti | | | | +Epoch 3 || on]/BertSelfAttentio | | | | +Epoch 3 || n[self]/NNCFLinear[q | | | | +Epoch 3 || uery]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.153 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[10]/B | | | | +Epoch 3 || ertAttention[attenti | | | | +Epoch 3 || on]/BertSelfAttentio | | | | +Epoch 3 || n[self]/NNCFLinear[k | | | | +Epoch 3 || ey]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.165 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[10]/B | | | | +Epoch 3 || ertAttention[attenti | | | | +Epoch 3 || on]/BertSelfAttentio | | | | +Epoch 3 || n[self]/NNCFLinear[v | | | | +Epoch 3 || alue]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.161 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[10]/B | | | | +Epoch 3 || ertAttention[attenti | | | | +Epoch 3 || on]/BertSelfOutput[o | | | | +Epoch 3 || utput]/NNCFLinear[de | | | | +Epoch 3 || nse]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072, 768] | 0.291 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[10]/B | | | | +Epoch 3 || ertIntermediate[inte | | | | +Epoch 3 || rmediate]/NNCFLinear | | | | +Epoch 3 || [dense]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 3072] | 0.296 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[10]/B | | | | +Epoch 3 || ertOutput[output]/NN | | | | +Epoch 3 || CFLinear[dense]/line | | | | +Epoch 3 || ar_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.153 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[11]/B | | | | +Epoch 3 || ertAttention[attenti | | | | +Epoch 3 || on]/BertSelfAttentio | | | | +Epoch 3 || n[self]/NNCFLinear[q | | | | +Epoch 3 || uery]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.152 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[11]/B | | | | +Epoch 3 || ertAttention[attenti | | | | +Epoch 3 || on]/BertSelfAttentio | | | | +Epoch 3 || n[self]/NNCFLinear[k | | | | +Epoch 3 || ey]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.157 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[11]/B | | | | +Epoch 3 || ertAttention[attenti | | | | +Epoch 3 || on]/BertSelfAttentio | | | | +Epoch 3 || n[self]/NNCFLinear[v | | | | +Epoch 3 || alue]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 768] | 0.153 | 0.694 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[11]/B | | | | +Epoch 3 || ertAttention[attenti | | | | +Epoch 3 || on]/BertSelfOutput[o | | | | +Epoch 3 || utput]/NNCFLinear[de | | | | +Epoch 3 || nse]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [3072, 768] | 0.290 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[11]/B | | | | +Epoch 3 || ertIntermediate[inte | | | | +Epoch 3 || rmediate]/NNCFLinear | | | | +Epoch 3 || [dense]/linear_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 || BertForSequenceClass | [768, 3072] | 0.292 | 2.778 | +Epoch 3 || ification/BertModel[ | | | | +Epoch 3 || bert]/BertEncoder[en | | | | +Epoch 3 || coder]/ModuleList[la | | | | +Epoch 3 || yer]/BertLayer[11]/B | | | | +Epoch 3 || ertOutput[output]/NN | | | | +Epoch 3 || CFLinear[dense]/line | | | | +Epoch 3 || ar_0 | | | | +Epoch 3 |+----------------------+----------------+----------------+---------------------+ +Epoch 3 | +Epoch 3 |Statistics of the magnitude sparsity algorithm: +Epoch 3 |+----------------------------------------------------------------------+-------+ +Epoch 3 || Statistic's name | Value | +Epoch 3 |+======================================================================+=======+ +Epoch 3 || A target level of the sparsity for the algorithm for the current | 0.255 | +Epoch 3 || epoch | | +Epoch 3 |+----------------------------------------------------------------------+-------+ +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || Layer's name | Sparsity threshold | +Epoch 3 |+=========================================================+====================+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 3 || linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 3 || near_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 3 || linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 3 || inear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermed | | +Epoch 3 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[o | | +Epoch 3 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 3 || linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 3 || near_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 3 || linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 3 || inear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermed | | +Epoch 3 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[o | | +Epoch 3 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 3 || linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 3 || near_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 3 || linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 3 || inear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermed | | +Epoch 3 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[o | | +Epoch 3 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 3 || linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 3 || near_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 3 || linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 3 || inear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermed | | +Epoch 3 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[o | | +Epoch 3 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 3 || linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 3 || near_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 3 || linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 3 || inear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermed | | +Epoch 3 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[o | | +Epoch 3 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 3 || linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 3 || near_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 3 || linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 3 || inear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermed | | +Epoch 3 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[o | | +Epoch 3 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 3 || linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 3 || near_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 3 || linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 3 || inear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermed | | +Epoch 3 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[o | | +Epoch 3 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 3 || linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 3 || near_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 3 || linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 3 || inear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermed | | +Epoch 3 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[o | | +Epoch 3 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 3 || linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 3 || near_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 3 || linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 3 || inear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermed | | +Epoch 3 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[o | | +Epoch 3 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 3 || linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 3 || near_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 3 || linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 3 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 3 || inear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermed | | +Epoch 3 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[o | | +Epoch 3 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 3 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 3 || /linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 3 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 3 || inear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 3 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 3 || /linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 3 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 3 || linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertInterme | | +Epoch 3 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[ | | +Epoch 3 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 3 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 3 || /linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 3 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 3 || inear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 3 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 3 || /linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 3 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 3 || linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertInterme | | +Epoch 3 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +Epoch 3 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 3 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[ | | +Epoch 3 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 3 |+---------------------------------------------------------+--------------------+ +INFO:nncf:Statistics of the quantization algorithm: +Epoch 4 |+--------------------------------+-------+ +Epoch 4 || Statistic's name | Value | +Epoch 4 |+================================+=======+ +Epoch 4 || Ratio of enabled quantizations | 100 | +Epoch 4 |+--------------------------------+-------+ +Epoch 4 | +Epoch 4 |Statistics of the quantization share: +Epoch 4 |+----------------------------------+--------------------+ +Epoch 4 || Statistic's name | Value | +Epoch 4 |+==================================+====================+ +Epoch 4 || Symmetric WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 4 |+----------------------------------+--------------------+ +Epoch 4 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 4 |+----------------------------------+--------------------+ +Epoch 4 || Signed WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 4 |+----------------------------------+--------------------+ +Epoch 4 || Unsigned WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 4 |+----------------------------------+--------------------+ +Epoch 4 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 4 |+----------------------------------+--------------------+ +Epoch 4 || Per-channel WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 4 |+----------------------------------+--------------------+ +Epoch 4 || Placed WQs / Potential WQs | 72.55 % (74 / 102) | +Epoch 4 |+----------------------------------+--------------------+ +Epoch 4 || Symmetric AQs / All placed AQs | 24.24 % (24 / 99) | +Epoch 4 |+----------------------------------+--------------------+ +Epoch 4 || Asymmetric AQs / All placed AQs | 75.76 % (75 / 99) | +Epoch 4 |+----------------------------------+--------------------+ +Epoch 4 || Signed AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 4 |+----------------------------------+--------------------+ +Epoch 4 || Unsigned AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 4 |+----------------------------------+--------------------+ +Epoch 4 || Per-tensor AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 4 |+----------------------------------+--------------------+ +Epoch 4 || Per-channel AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 4 |+----------------------------------+--------------------+ +Epoch 4 | +Epoch 4 |Statistics of the bitwidth distribution: +Epoch 4 |+--------------+---------------------+--------------------+--------------------+ +Epoch 4 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 4 || | WQs | Placed AQs | Qs | +Epoch 4 |+==============+=====================+====================+====================+ +Epoch 4 || 8 | 100.00 % (74 / 74) | 100.00 % (99 / 99) | 100.00 % (173 / | +Epoch 4 || | | | 173) | +Epoch 4 |+--------------+---------------------+--------------------+--------------------+ +Epoch 4 | +Epoch 4 |Statistics of the sparsified model: +Epoch 4 |+-----------------------------------------+-------+ +Epoch 4 || Statistic's name | Value | +Epoch 4 |+=========================================+=======+ +Epoch 4 || Sparsity level of the whole model | 0.253 | +Epoch 4 |+-----------------------------------------+-------+ +Epoch 4 || Sparsity level of all sparsified layers | 0.326 | +Epoch 4 |+-----------------------------------------+-------+ +Epoch 4 | +Epoch 4 |Statistics by sparsified layers: +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 4 |+======================+================+================+=====================+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.205 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[0]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[qu | | | | +Epoch 4 || ery]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.209 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[0]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[ke | | | | +Epoch 4 || y]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.214 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[0]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[va | | | | +Epoch 4 || lue]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.221 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[0]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfOutput[ou | | | | +Epoch 4 || tput]/NNCFLinear[den | | | | +Epoch 4 || se]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072, 768] | 0.381 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[0]/Be | | | | +Epoch 4 || rtIntermediate[inter | | | | +Epoch 4 || mediate]/NNCFLinear[ | | | | +Epoch 4 || dense]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 3072] | 0.391 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[0]/Be | | | | +Epoch 4 || rtOutput[output]/NNC | | | | +Epoch 4 || FLinear[dense]/linea | | | | +Epoch 4 || r_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.202 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[1]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[qu | | | | +Epoch 4 || ery]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.204 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[1]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[ke | | | | +Epoch 4 || y]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.211 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[1]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[va | | | | +Epoch 4 || lue]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.221 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[1]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfOutput[ou | | | | +Epoch 4 || tput]/NNCFLinear[den | | | | +Epoch 4 || se]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072, 768] | 0.381 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[1]/Be | | | | +Epoch 4 || rtIntermediate[inter | | | | +Epoch 4 || mediate]/NNCFLinear[ | | | | +Epoch 4 || dense]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 3072] | 0.393 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[1]/Be | | | | +Epoch 4 || rtOutput[output]/NNC | | | | +Epoch 4 || FLinear[dense]/linea | | | | +Epoch 4 || r_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.211 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[2]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[qu | | | | +Epoch 4 || ery]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.211 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[2]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[ke | | | | +Epoch 4 || y]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.215 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[2]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[va | | | | +Epoch 4 || lue]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.221 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[2]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfOutput[ou | | | | +Epoch 4 || tput]/NNCFLinear[den | | | | +Epoch 4 || se]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072, 768] | 0.383 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[2]/Be | | | | +Epoch 4 || rtIntermediate[inter | | | | +Epoch 4 || mediate]/NNCFLinear[ | | | | +Epoch 4 || dense]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 3072] | 0.394 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[2]/Be | | | | +Epoch 4 || rtOutput[output]/NNC | | | | +Epoch 4 || FLinear[dense]/linea | | | | +Epoch 4 || r_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.200 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[3]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[qu | | | | +Epoch 4 || ery]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.202 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[3]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[ke | | | | +Epoch 4 || y]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.212 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[3]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[va | | | | +Epoch 4 || lue]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.216 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[3]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfOutput[ou | | | | +Epoch 4 || tput]/NNCFLinear[den | | | | +Epoch 4 || se]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072, 768] | 0.383 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[3]/Be | | | | +Epoch 4 || rtIntermediate[inter | | | | +Epoch 4 || mediate]/NNCFLinear[ | | | | +Epoch 4 || dense]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 3072] | 0.398 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[3]/Be | | | | +Epoch 4 || rtOutput[output]/NNC | | | | +Epoch 4 || FLinear[dense]/linea | | | | +Epoch 4 || r_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.200 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[4]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[qu | | | | +Epoch 4 || ery]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.200 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[4]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[ke | | | | +Epoch 4 || y]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.207 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[4]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[va | | | | +Epoch 4 || lue]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.213 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[4]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfOutput[ou | | | | +Epoch 4 || tput]/NNCFLinear[den | | | | +Epoch 4 || se]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072, 768] | 0.385 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[4]/Be | | | | +Epoch 4 || rtIntermediate[inter | | | | +Epoch 4 || mediate]/NNCFLinear[ | | | | +Epoch 4 || dense]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 3072] | 0.401 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[4]/Be | | | | +Epoch 4 || rtOutput[output]/NNC | | | | +Epoch 4 || FLinear[dense]/linea | | | | +Epoch 4 || r_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.200 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[5]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[qu | | | | +Epoch 4 || ery]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.200 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[5]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[ke | | | | +Epoch 4 || y]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.210 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[5]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[va | | | | +Epoch 4 || lue]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.212 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[5]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfOutput[ou | | | | +Epoch 4 || tput]/NNCFLinear[den | | | | +Epoch 4 || se]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072, 768] | 0.384 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[5]/Be | | | | +Epoch 4 || rtIntermediate[inter | | | | +Epoch 4 || mediate]/NNCFLinear[ | | | | +Epoch 4 || dense]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 3072] | 0.401 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[5]/Be | | | | +Epoch 4 || rtOutput[output]/NNC | | | | +Epoch 4 || FLinear[dense]/linea | | | | +Epoch 4 || r_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.199 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[6]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[qu | | | | +Epoch 4 || ery]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.198 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[6]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[ke | | | | +Epoch 4 || y]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.209 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[6]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[va | | | | +Epoch 4 || lue]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.213 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[6]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfOutput[ou | | | | +Epoch 4 || tput]/NNCFLinear[den | | | | +Epoch 4 || se]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072, 768] | 0.383 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[6]/Be | | | | +Epoch 4 || rtIntermediate[inter | | | | +Epoch 4 || mediate]/NNCFLinear[ | | | | +Epoch 4 || dense]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 3072] | 0.397 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[6]/Be | | | | +Epoch 4 || rtOutput[output]/NNC | | | | +Epoch 4 || FLinear[dense]/linea | | | | +Epoch 4 || r_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.198 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[7]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[qu | | | | +Epoch 4 || ery]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.199 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[7]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[ke | | | | +Epoch 4 || y]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.207 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[7]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[va | | | | +Epoch 4 || lue]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.209 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[7]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfOutput[ou | | | | +Epoch 4 || tput]/NNCFLinear[den | | | | +Epoch 4 || se]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072, 768] | 0.380 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[7]/Be | | | | +Epoch 4 || rtIntermediate[inter | | | | +Epoch 4 || mediate]/NNCFLinear[ | | | | +Epoch 4 || dense]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 3072] | 0.392 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[7]/Be | | | | +Epoch 4 || rtOutput[output]/NNC | | | | +Epoch 4 || FLinear[dense]/linea | | | | +Epoch 4 || r_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.198 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[8]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[qu | | | | +Epoch 4 || ery]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.198 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[8]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[ke | | | | +Epoch 4 || y]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.203 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[8]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[va | | | | +Epoch 4 || lue]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.204 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[8]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfOutput[ou | | | | +Epoch 4 || tput]/NNCFLinear[den | | | | +Epoch 4 || se]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072, 768] | 0.377 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[8]/Be | | | | +Epoch 4 || rtIntermediate[inter | | | | +Epoch 4 || mediate]/NNCFLinear[ | | | | +Epoch 4 || dense]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 3072] | 0.387 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[8]/Be | | | | +Epoch 4 || rtOutput[output]/NNC | | | | +Epoch 4 || FLinear[dense]/linea | | | | +Epoch 4 || r_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.196 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[9]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[qu | | | | +Epoch 4 || ery]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.198 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[9]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[ke | | | | +Epoch 4 || y]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.201 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[9]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfAttention | | | | +Epoch 4 || [self]/NNCFLinear[va | | | | +Epoch 4 || lue]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.201 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[9]/Be | | | | +Epoch 4 || rtAttention[attentio | | | | +Epoch 4 || n]/BertSelfOutput[ou | | | | +Epoch 4 || tput]/NNCFLinear[den | | | | +Epoch 4 || se]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072, 768] | 0.380 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[9]/Be | | | | +Epoch 4 || rtIntermediate[inter | | | | +Epoch 4 || mediate]/NNCFLinear[ | | | | +Epoch 4 || dense]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 3072] | 0.388 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[9]/Be | | | | +Epoch 4 || rtOutput[output]/NNC | | | | +Epoch 4 || FLinear[dense]/linea | | | | +Epoch 4 || r_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.196 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[10]/B | | | | +Epoch 4 || ertAttention[attenti | | | | +Epoch 4 || on]/BertSelfAttentio | | | | +Epoch 4 || n[self]/NNCFLinear[q | | | | +Epoch 4 || uery]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.197 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[10]/B | | | | +Epoch 4 || ertAttention[attenti | | | | +Epoch 4 || on]/BertSelfAttentio | | | | +Epoch 4 || n[self]/NNCFLinear[k | | | | +Epoch 4 || ey]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.209 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[10]/B | | | | +Epoch 4 || ertAttention[attenti | | | | +Epoch 4 || on]/BertSelfAttentio | | | | +Epoch 4 || n[self]/NNCFLinear[v | | | | +Epoch 4 || alue]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.204 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[10]/B | | | | +Epoch 4 || ertAttention[attenti | | | | +Epoch 4 || on]/BertSelfOutput[o | | | | +Epoch 4 || utput]/NNCFLinear[de | | | | +Epoch 4 || nse]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072, 768] | 0.375 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[10]/B | | | | +Epoch 4 || ertIntermediate[inte | | | | +Epoch 4 || rmediate]/NNCFLinear | | | | +Epoch 4 || [dense]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 3072] | 0.381 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[10]/B | | | | +Epoch 4 || ertOutput[output]/NN | | | | +Epoch 4 || CFLinear[dense]/line | | | | +Epoch 4 || ar_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.197 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[11]/B | | | | +Epoch 4 || ertAttention[attenti | | | | +Epoch 4 || on]/BertSelfAttentio | | | | +Epoch 4 || n[self]/NNCFLinear[q | | | | +Epoch 4 || uery]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.196 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[11]/B | | | | +Epoch 4 || ertAttention[attenti | | | | +Epoch 4 || on]/BertSelfAttentio | | | | +Epoch 4 || n[self]/NNCFLinear[k | | | | +Epoch 4 || ey]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.202 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[11]/B | | | | +Epoch 4 || ertAttention[attenti | | | | +Epoch 4 || on]/BertSelfAttentio | | | | +Epoch 4 || n[self]/NNCFLinear[v | | | | +Epoch 4 || alue]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 768] | 0.197 | 0.694 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[11]/B | | | | +Epoch 4 || ertAttention[attenti | | | | +Epoch 4 || on]/BertSelfOutput[o | | | | +Epoch 4 || utput]/NNCFLinear[de | | | | +Epoch 4 || nse]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [3072, 768] | 0.374 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[11]/B | | | | +Epoch 4 || ertIntermediate[inte | | | | +Epoch 4 || rmediate]/NNCFLinear | | | | +Epoch 4 || [dense]/linear_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 || BertForSequenceClass | [768, 3072] | 0.377 | 2.778 | +Epoch 4 || ification/BertModel[ | | | | +Epoch 4 || bert]/BertEncoder[en | | | | +Epoch 4 || coder]/ModuleList[la | | | | +Epoch 4 || yer]/BertLayer[11]/B | | | | +Epoch 4 || ertOutput[output]/NN | | | | +Epoch 4 || CFLinear[dense]/line | | | | +Epoch 4 || ar_0 | | | | +Epoch 4 |+----------------------+----------------+----------------+---------------------+ +Epoch 4 | +Epoch 4 |Statistics of the magnitude sparsity algorithm: +Epoch 4 |+----------------------------------------------------------------------+-------+ +Epoch 4 || Statistic's name | Value | +Epoch 4 |+======================================================================+=======+ +Epoch 4 || A target level of the sparsity for the algorithm for the current | 0.326 | +Epoch 4 || epoch | | +Epoch 4 |+----------------------------------------------------------------------+-------+ +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || Layer's name | Sparsity threshold | +Epoch 4 |+=========================================================+====================+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 4 || linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 4 || near_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 4 || linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 4 || inear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermed | | +Epoch 4 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[o | | +Epoch 4 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 4 || linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 4 || near_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 4 || linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 4 || inear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermed | | +Epoch 4 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[o | | +Epoch 4 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 4 || linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 4 || near_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 4 || linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 4 || inear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermed | | +Epoch 4 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[o | | +Epoch 4 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 4 || linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 4 || near_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 4 || linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 4 || inear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermed | | +Epoch 4 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[o | | +Epoch 4 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 4 || linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 4 || near_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 4 || linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 4 || inear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermed | | +Epoch 4 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[o | | +Epoch 4 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 4 || linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 4 || near_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 4 || linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 4 || inear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermed | | +Epoch 4 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[o | | +Epoch 4 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 4 || linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 4 || near_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 4 || linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 4 || inear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermed | | +Epoch 4 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[o | | +Epoch 4 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 4 || linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 4 || near_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 4 || linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 4 || inear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermed | | +Epoch 4 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[o | | +Epoch 4 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 4 || linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 4 || near_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 4 || linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 4 || inear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermed | | +Epoch 4 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[o | | +Epoch 4 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 4 || linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 4 || near_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 4 || linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 4 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 4 || inear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermed | | +Epoch 4 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[o | | +Epoch 4 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 4 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 4 || /linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 4 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 4 || inear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 4 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 4 || /linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 4 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 4 || linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertInterme | | +Epoch 4 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[ | | +Epoch 4 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 4 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 4 || /linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 4 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 4 || inear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 4 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 4 || /linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 4 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 4 || linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertInterme | | +Epoch 4 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +Epoch 4 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 4 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[ | | +Epoch 4 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 4 |+---------------------------------------------------------+--------------------+ +INFO:nncf:Statistics of the quantization algorithm: +Epoch 5 |+--------------------------------+-------+ +Epoch 5 || Statistic's name | Value | +Epoch 5 |+================================+=======+ +Epoch 5 || Ratio of enabled quantizations | 100 | +Epoch 5 |+--------------------------------+-------+ +Epoch 5 | +Epoch 5 |Statistics of the quantization share: +Epoch 5 |+----------------------------------+--------------------+ +Epoch 5 || Statistic's name | Value | +Epoch 5 |+==================================+====================+ +Epoch 5 || Symmetric WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 5 |+----------------------------------+--------------------+ +Epoch 5 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 5 |+----------------------------------+--------------------+ +Epoch 5 || Signed WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 5 |+----------------------------------+--------------------+ +Epoch 5 || Unsigned WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 5 |+----------------------------------+--------------------+ +Epoch 5 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 5 |+----------------------------------+--------------------+ +Epoch 5 || Per-channel WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 5 |+----------------------------------+--------------------+ +Epoch 5 || Placed WQs / Potential WQs | 72.55 % (74 / 102) | +Epoch 5 |+----------------------------------+--------------------+ +Epoch 5 || Symmetric AQs / All placed AQs | 24.24 % (24 / 99) | +Epoch 5 |+----------------------------------+--------------------+ +Epoch 5 || Asymmetric AQs / All placed AQs | 75.76 % (75 / 99) | +Epoch 5 |+----------------------------------+--------------------+ +Epoch 5 || Signed AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 5 |+----------------------------------+--------------------+ +Epoch 5 || Unsigned AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 5 |+----------------------------------+--------------------+ +Epoch 5 || Per-tensor AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 5 |+----------------------------------+--------------------+ +Epoch 5 || Per-channel AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 5 |+----------------------------------+--------------------+ +Epoch 5 | +Epoch 5 |Statistics of the bitwidth distribution: +Epoch 5 |+--------------+---------------------+--------------------+--------------------+ +Epoch 5 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 5 || | WQs | Placed AQs | Qs | +Epoch 5 |+==============+=====================+====================+====================+ +Epoch 5 || 8 | 100.00 % (74 / 74) | 100.00 % (99 / 99) | 100.00 % (173 / | +Epoch 5 || | | | 173) | +Epoch 5 |+--------------+---------------------+--------------------+--------------------+ +Epoch 5 | +Epoch 5 |Statistics of the sparsified model: +Epoch 5 |+-----------------------------------------+-------+ +Epoch 5 || Statistic's name | Value | +Epoch 5 |+=========================================+=======+ +Epoch 5 || Sparsity level of the whole model | 0.303 | +Epoch 5 |+-----------------------------------------+-------+ +Epoch 5 || Sparsity level of all sparsified layers | 0.390 | +Epoch 5 |+-----------------------------------------+-------+ +Epoch 5 | +Epoch 5 |Statistics by sparsified layers: +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 5 |+======================+================+================+=====================+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.246 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[0]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[qu | | | | +Epoch 5 || ery]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.251 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[0]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[ke | | | | +Epoch 5 || y]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.251 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[0]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[va | | | | +Epoch 5 || lue]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.259 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[0]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfOutput[ou | | | | +Epoch 5 || tput]/NNCFLinear[den | | | | +Epoch 5 || se]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [3072, 768] | 0.457 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[0]/Be | | | | +Epoch 5 || rtIntermediate[inter | | | | +Epoch 5 || mediate]/NNCFLinear[ | | | | +Epoch 5 || dense]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 3072] | 0.468 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[0]/Be | | | | +Epoch 5 || rtOutput[output]/NNC | | | | +Epoch 5 || FLinear[dense]/linea | | | | +Epoch 5 || r_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.243 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[1]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[qu | | | | +Epoch 5 || ery]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.246 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[1]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[ke | | | | +Epoch 5 || y]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.248 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[1]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[va | | | | +Epoch 5 || lue]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.258 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[1]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfOutput[ou | | | | +Epoch 5 || tput]/NNCFLinear[den | | | | +Epoch 5 || se]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [3072, 768] | 0.457 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[1]/Be | | | | +Epoch 5 || rtIntermediate[inter | | | | +Epoch 5 || mediate]/NNCFLinear[ | | | | +Epoch 5 || dense]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 3072] | 0.470 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[1]/Be | | | | +Epoch 5 || rtOutput[output]/NNC | | | | +Epoch 5 || FLinear[dense]/linea | | | | +Epoch 5 || r_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.255 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[2]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[qu | | | | +Epoch 5 || ery]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.255 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[2]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[ke | | | | +Epoch 5 || y]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.251 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[2]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[va | | | | +Epoch 5 || lue]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.258 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[2]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfOutput[ou | | | | +Epoch 5 || tput]/NNCFLinear[den | | | | +Epoch 5 || se]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [3072, 768] | 0.459 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[2]/Be | | | | +Epoch 5 || rtIntermediate[inter | | | | +Epoch 5 || mediate]/NNCFLinear[ | | | | +Epoch 5 || dense]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 3072] | 0.471 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[2]/Be | | | | +Epoch 5 || rtOutput[output]/NNC | | | | +Epoch 5 || FLinear[dense]/linea | | | | +Epoch 5 || r_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.241 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[3]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[qu | | | | +Epoch 5 || ery]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.244 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[3]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[ke | | | | +Epoch 5 || y]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.251 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[3]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[va | | | | +Epoch 5 || lue]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.253 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[3]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfOutput[ou | | | | +Epoch 5 || tput]/NNCFLinear[den | | | | +Epoch 5 || se]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [3072, 768] | 0.460 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[3]/Be | | | | +Epoch 5 || rtIntermediate[inter | | | | +Epoch 5 || mediate]/NNCFLinear[ | | | | +Epoch 5 || dense]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 3072] | 0.476 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[3]/Be | | | | +Epoch 5 || rtOutput[output]/NNC | | | | +Epoch 5 || FLinear[dense]/linea | | | | +Epoch 5 || r_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.240 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[4]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[qu | | | | +Epoch 5 || ery]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.240 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[4]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[ke | | | | +Epoch 5 || y]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.245 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[4]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[va | | | | +Epoch 5 || lue]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.252 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[4]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfOutput[ou | | | | +Epoch 5 || tput]/NNCFLinear[den | | | | +Epoch 5 || se]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [3072, 768] | 0.462 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[4]/Be | | | | +Epoch 5 || rtIntermediate[inter | | | | +Epoch 5 || mediate]/NNCFLinear[ | | | | +Epoch 5 || dense]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 3072] | 0.480 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[4]/Be | | | | +Epoch 5 || rtOutput[output]/NNC | | | | +Epoch 5 || FLinear[dense]/linea | | | | +Epoch 5 || r_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.240 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[5]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[qu | | | | +Epoch 5 || ery]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.241 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[5]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[ke | | | | +Epoch 5 || y]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.250 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[5]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[va | | | | +Epoch 5 || lue]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.251 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[5]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfOutput[ou | | | | +Epoch 5 || tput]/NNCFLinear[den | | | | +Epoch 5 || se]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [3072, 768] | 0.461 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[5]/Be | | | | +Epoch 5 || rtIntermediate[inter | | | | +Epoch 5 || mediate]/NNCFLinear[ | | | | +Epoch 5 || dense]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 3072] | 0.479 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[5]/Be | | | | +Epoch 5 || rtOutput[output]/NNC | | | | +Epoch 5 || FLinear[dense]/linea | | | | +Epoch 5 || r_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.239 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[6]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[qu | | | | +Epoch 5 || ery]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.239 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[6]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[ke | | | | +Epoch 5 || y]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.249 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[6]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[va | | | | +Epoch 5 || lue]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.253 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[6]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfOutput[ou | | | | +Epoch 5 || tput]/NNCFLinear[den | | | | +Epoch 5 || se]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [3072, 768] | 0.460 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[6]/Be | | | | +Epoch 5 || rtIntermediate[inter | | | | +Epoch 5 || mediate]/NNCFLinear[ | | | | +Epoch 5 || dense]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 3072] | 0.476 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[6]/Be | | | | +Epoch 5 || rtOutput[output]/NNC | | | | +Epoch 5 || FLinear[dense]/linea | | | | +Epoch 5 || r_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.238 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[7]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[qu | | | | +Epoch 5 || ery]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.238 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[7]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[ke | | | | +Epoch 5 || y]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.245 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[7]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[va | | | | +Epoch 5 || lue]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.248 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[7]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfOutput[ou | | | | +Epoch 5 || tput]/NNCFLinear[den | | | | +Epoch 5 || se]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [3072, 768] | 0.456 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[7]/Be | | | | +Epoch 5 || rtIntermediate[inter | | | | +Epoch 5 || mediate]/NNCFLinear[ | | | | +Epoch 5 || dense]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 3072] | 0.469 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[7]/Be | | | | +Epoch 5 || rtOutput[output]/NNC | | | | +Epoch 5 || FLinear[dense]/linea | | | | +Epoch 5 || r_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.238 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[8]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[qu | | | | +Epoch 5 || ery]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.239 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[8]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[ke | | | | +Epoch 5 || y]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.242 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[8]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[va | | | | +Epoch 5 || lue]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.244 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[8]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfOutput[ou | | | | +Epoch 5 || tput]/NNCFLinear[den | | | | +Epoch 5 || se]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [3072, 768] | 0.453 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[8]/Be | | | | +Epoch 5 || rtIntermediate[inter | | | | +Epoch 5 || mediate]/NNCFLinear[ | | | | +Epoch 5 || dense]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 3072] | 0.464 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[8]/Be | | | | +Epoch 5 || rtOutput[output]/NNC | | | | +Epoch 5 || FLinear[dense]/linea | | | | +Epoch 5 || r_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.236 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[9]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[qu | | | | +Epoch 5 || ery]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.238 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[9]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[ke | | | | +Epoch 5 || y]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.241 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[9]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfAttention | | | | +Epoch 5 || [self]/NNCFLinear[va | | | | +Epoch 5 || lue]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.240 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[9]/Be | | | | +Epoch 5 || rtAttention[attentio | | | | +Epoch 5 || n]/BertSelfOutput[ou | | | | +Epoch 5 || tput]/NNCFLinear[den | | | | +Epoch 5 || se]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [3072, 768] | 0.457 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[9]/Be | | | | +Epoch 5 || rtIntermediate[inter | | | | +Epoch 5 || mediate]/NNCFLinear[ | | | | +Epoch 5 || dense]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 3072] | 0.466 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[9]/Be | | | | +Epoch 5 || rtOutput[output]/NNC | | | | +Epoch 5 || FLinear[dense]/linea | | | | +Epoch 5 || r_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.237 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[10]/B | | | | +Epoch 5 || ertAttention[attenti | | | | +Epoch 5 || on]/BertSelfAttentio | | | | +Epoch 5 || n[self]/NNCFLinear[q | | | | +Epoch 5 || uery]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.238 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[10]/B | | | | +Epoch 5 || ertAttention[attenti | | | | +Epoch 5 || on]/BertSelfAttentio | | | | +Epoch 5 || n[self]/NNCFLinear[k | | | | +Epoch 5 || ey]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.250 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[10]/B | | | | +Epoch 5 || ertAttention[attenti | | | | +Epoch 5 || on]/BertSelfAttentio | | | | +Epoch 5 || n[self]/NNCFLinear[v | | | | +Epoch 5 || alue]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.244 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[10]/B | | | | +Epoch 5 || ertAttention[attenti | | | | +Epoch 5 || on]/BertSelfOutput[o | | | | +Epoch 5 || utput]/NNCFLinear[de | | | | +Epoch 5 || nse]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [3072, 768] | 0.451 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[10]/B | | | | +Epoch 5 || ertIntermediate[inte | | | | +Epoch 5 || rmediate]/NNCFLinear | | | | +Epoch 5 || [dense]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 3072] | 0.458 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[10]/B | | | | +Epoch 5 || ertOutput[output]/NN | | | | +Epoch 5 || CFLinear[dense]/line | | | | +Epoch 5 || ar_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.239 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[11]/B | | | | +Epoch 5 || ertAttention[attenti | | | | +Epoch 5 || on]/BertSelfAttentio | | | | +Epoch 5 || n[self]/NNCFLinear[q | | | | +Epoch 5 || uery]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.237 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[11]/B | | | | +Epoch 5 || ertAttention[attenti | | | | +Epoch 5 || on]/BertSelfAttentio | | | | +Epoch 5 || n[self]/NNCFLinear[k | | | | +Epoch 5 || ey]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.243 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[11]/B | | | | +Epoch 5 || ertAttention[attenti | | | | +Epoch 5 || on]/BertSelfAttentio | | | | +Epoch 5 || n[self]/NNCFLinear[v | | | | +Epoch 5 || alue]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 768] | 0.237 | 0.694 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[11]/B | | | | +Epoch 5 || ertAttention[attenti | | | | +Epoch 5 || on]/BertSelfOutput[o | | | | +Epoch 5 || utput]/NNCFLinear[de | | | | +Epoch 5 || nse]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [3072, 768] | 0.450 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[11]/B | | | | +Epoch 5 || ertIntermediate[inte | | | | +Epoch 5 || rmediate]/NNCFLinear | | | | +Epoch 5 || [dense]/linear_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 || BertForSequenceClass | [768, 3072] | 0.453 | 2.778 | +Epoch 5 || ification/BertModel[ | | | | +Epoch 5 || bert]/BertEncoder[en | | | | +Epoch 5 || coder]/ModuleList[la | | | | +Epoch 5 || yer]/BertLayer[11]/B | | | | +Epoch 5 || ertOutput[output]/NN | | | | +Epoch 5 || CFLinear[dense]/line | | | | +Epoch 5 || ar_0 | | | | +Epoch 5 |+----------------------+----------------+----------------+---------------------+ +Epoch 5 | +Epoch 5 |Statistics of the magnitude sparsity algorithm: +Epoch 5 |+----------------------------------------------------------------------+-------+ +Epoch 5 || Statistic's name | Value | +Epoch 5 |+======================================================================+=======+ +Epoch 5 || A target level of the sparsity for the algorithm for the current | 0.390 | +Epoch 5 || epoch | | +Epoch 5 |+----------------------------------------------------------------------+-------+ +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || Layer's name | Sparsity threshold | +Epoch 5 |+=========================================================+====================+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 5 || linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 5 || near_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 5 || linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 5 || inear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermed | | +Epoch 5 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[o | | +Epoch 5 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 5 || linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 5 || near_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 5 || linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 5 || inear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermed | | +Epoch 5 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[o | | +Epoch 5 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 5 || linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 5 || near_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 5 || linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 5 || inear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermed | | +Epoch 5 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[o | | +Epoch 5 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 5 || linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 5 || near_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 5 || linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 5 || inear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermed | | +Epoch 5 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[o | | +Epoch 5 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 5 || linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 5 || near_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 5 || linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 5 || inear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermed | | +Epoch 5 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[o | | +Epoch 5 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 5 || linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 5 || near_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 5 || linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 5 || inear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermed | | +Epoch 5 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[o | | +Epoch 5 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 5 || linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 5 || near_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 5 || linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 5 || inear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermed | | +Epoch 5 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[o | | +Epoch 5 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 5 || linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 5 || near_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 5 || linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 5 || inear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermed | | +Epoch 5 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[o | | +Epoch 5 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 5 || linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 5 || near_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 5 || linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 5 || inear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermed | | +Epoch 5 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[o | | +Epoch 5 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 5 || linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 5 || near_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 5 || linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 5 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 5 || inear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermed | | +Epoch 5 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[o | | +Epoch 5 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 5 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 5 || /linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 5 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 5 || inear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 5 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 5 || /linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 5 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 5 || linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertInterme | | +Epoch 5 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[ | | +Epoch 5 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 5 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 5 || /linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 5 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 5 || inear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 5 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 5 || /linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 5 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 5 || linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertInterme | | +Epoch 5 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +Epoch 5 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 5 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[ | | +Epoch 5 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 5 |+---------------------------------------------------------+--------------------+ +INFO:nncf:Statistics of the quantization algorithm: +Epoch 6 |+--------------------------------+-------+ +Epoch 6 || Statistic's name | Value | +Epoch 6 |+================================+=======+ +Epoch 6 || Ratio of enabled quantizations | 100 | +Epoch 6 |+--------------------------------+-------+ +Epoch 6 | +Epoch 6 |Statistics of the quantization share: +Epoch 6 |+----------------------------------+--------------------+ +Epoch 6 || Statistic's name | Value | +Epoch 6 |+==================================+====================+ +Epoch 6 || Symmetric WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 6 |+----------------------------------+--------------------+ +Epoch 6 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 6 |+----------------------------------+--------------------+ +Epoch 6 || Signed WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 6 |+----------------------------------+--------------------+ +Epoch 6 || Unsigned WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 6 |+----------------------------------+--------------------+ +Epoch 6 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 6 |+----------------------------------+--------------------+ +Epoch 6 || Per-channel WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 6 |+----------------------------------+--------------------+ +Epoch 6 || Placed WQs / Potential WQs | 72.55 % (74 / 102) | +Epoch 6 |+----------------------------------+--------------------+ +Epoch 6 || Symmetric AQs / All placed AQs | 24.24 % (24 / 99) | +Epoch 6 |+----------------------------------+--------------------+ +Epoch 6 || Asymmetric AQs / All placed AQs | 75.76 % (75 / 99) | +Epoch 6 |+----------------------------------+--------------------+ +Epoch 6 || Signed AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 6 |+----------------------------------+--------------------+ +Epoch 6 || Unsigned AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 6 |+----------------------------------+--------------------+ +Epoch 6 || Per-tensor AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 6 |+----------------------------------+--------------------+ +Epoch 6 || Per-channel AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 6 |+----------------------------------+--------------------+ +Epoch 6 | +Epoch 6 |Statistics of the bitwidth distribution: +Epoch 6 |+--------------+---------------------+--------------------+--------------------+ +Epoch 6 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 6 || | WQs | Placed AQs | Qs | +Epoch 6 |+==============+=====================+====================+====================+ +Epoch 6 || 8 | 100.00 % (74 / 74) | 100.00 % (99 / 99) | 100.00 % (173 / | +Epoch 6 || | | | 173) | +Epoch 6 |+--------------+---------------------+--------------------+--------------------+ +Epoch 6 | +Epoch 6 |Statistics of the sparsified model: +Epoch 6 |+-----------------------------------------+-------+ +Epoch 6 || Statistic's name | Value | +Epoch 6 |+=========================================+=======+ +Epoch 6 || Sparsity level of the whole model | 0.348 | +Epoch 6 |+-----------------------------------------+-------+ +Epoch 6 || Sparsity level of all sparsified layers | 0.449 | +Epoch 6 |+-----------------------------------------+-------+ +Epoch 6 | +Epoch 6 |Statistics by sparsified layers: +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 6 |+======================+================+================+=====================+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.286 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[0]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[qu | | | | +Epoch 6 || ery]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.291 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[0]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[ke | | | | +Epoch 6 || y]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.287 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[0]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[va | | | | +Epoch 6 || lue]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.297 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[0]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfOutput[ou | | | | +Epoch 6 || tput]/NNCFLinear[den | | | | +Epoch 6 || se]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [3072, 768] | 0.524 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[0]/Be | | | | +Epoch 6 || rtIntermediate[inter | | | | +Epoch 6 || mediate]/NNCFLinear[ | | | | +Epoch 6 || dense]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 3072] | 0.536 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[0]/Be | | | | +Epoch 6 || rtOutput[output]/NNC | | | | +Epoch 6 || FLinear[dense]/linea | | | | +Epoch 6 || r_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.282 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[1]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[qu | | | | +Epoch 6 || ery]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.285 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[1]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[ke | | | | +Epoch 6 || y]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.284 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[1]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[va | | | | +Epoch 6 || lue]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.296 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[1]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfOutput[ou | | | | +Epoch 6 || tput]/NNCFLinear[den | | | | +Epoch 6 || se]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [3072, 768] | 0.525 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[1]/Be | | | | +Epoch 6 || rtIntermediate[inter | | | | +Epoch 6 || mediate]/NNCFLinear[ | | | | +Epoch 6 || dense]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 3072] | 0.539 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[1]/Be | | | | +Epoch 6 || rtOutput[output]/NNC | | | | +Epoch 6 || FLinear[dense]/linea | | | | +Epoch 6 || r_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.296 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[2]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[qu | | | | +Epoch 6 || ery]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.296 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[2]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[ke | | | | +Epoch 6 || y]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.288 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[2]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[va | | | | +Epoch 6 || lue]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.294 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[2]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfOutput[ou | | | | +Epoch 6 || tput]/NNCFLinear[den | | | | +Epoch 6 || se]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [3072, 768] | 0.527 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[2]/Be | | | | +Epoch 6 || rtIntermediate[inter | | | | +Epoch 6 || mediate]/NNCFLinear[ | | | | +Epoch 6 || dense]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 3072] | 0.540 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[2]/Be | | | | +Epoch 6 || rtOutput[output]/NNC | | | | +Epoch 6 || FLinear[dense]/linea | | | | +Epoch 6 || r_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.281 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[3]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[qu | | | | +Epoch 6 || ery]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.283 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[3]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[ke | | | | +Epoch 6 || y]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.288 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[3]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[va | | | | +Epoch 6 || lue]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.291 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[3]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfOutput[ou | | | | +Epoch 6 || tput]/NNCFLinear[den | | | | +Epoch 6 || se]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [3072, 768] | 0.528 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[3]/Be | | | | +Epoch 6 || rtIntermediate[inter | | | | +Epoch 6 || mediate]/NNCFLinear[ | | | | +Epoch 6 || dense]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 3072] | 0.545 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[3]/Be | | | | +Epoch 6 || rtOutput[output]/NNC | | | | +Epoch 6 || FLinear[dense]/linea | | | | +Epoch 6 || r_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.280 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[4]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[qu | | | | +Epoch 6 || ery]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.278 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[4]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[ke | | | | +Epoch 6 || y]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.284 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[4]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[va | | | | +Epoch 6 || lue]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.290 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[4]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfOutput[ou | | | | +Epoch 6 || tput]/NNCFLinear[den | | | | +Epoch 6 || se]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [3072, 768] | 0.530 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[4]/Be | | | | +Epoch 6 || rtIntermediate[inter | | | | +Epoch 6 || mediate]/NNCFLinear[ | | | | +Epoch 6 || dense]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 3072] | 0.549 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[4]/Be | | | | +Epoch 6 || rtOutput[output]/NNC | | | | +Epoch 6 || FLinear[dense]/linea | | | | +Epoch 6 || r_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.279 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[5]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[qu | | | | +Epoch 6 || ery]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.280 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[5]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[ke | | | | +Epoch 6 || y]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.289 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[5]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[va | | | | +Epoch 6 || lue]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.290 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[5]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfOutput[ou | | | | +Epoch 6 || tput]/NNCFLinear[den | | | | +Epoch 6 || se]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [3072, 768] | 0.529 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[5]/Be | | | | +Epoch 6 || rtIntermediate[inter | | | | +Epoch 6 || mediate]/NNCFLinear[ | | | | +Epoch 6 || dense]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 3072] | 0.548 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[5]/Be | | | | +Epoch 6 || rtOutput[output]/NNC | | | | +Epoch 6 || FLinear[dense]/linea | | | | +Epoch 6 || r_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.278 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[6]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[qu | | | | +Epoch 6 || ery]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.278 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[6]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[ke | | | | +Epoch 6 || y]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.288 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[6]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[va | | | | +Epoch 6 || lue]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.290 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[6]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfOutput[ou | | | | +Epoch 6 || tput]/NNCFLinear[den | | | | +Epoch 6 || se]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [3072, 768] | 0.528 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[6]/Be | | | | +Epoch 6 || rtIntermediate[inter | | | | +Epoch 6 || mediate]/NNCFLinear[ | | | | +Epoch 6 || dense]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 3072] | 0.544 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[6]/Be | | | | +Epoch 6 || rtOutput[output]/NNC | | | | +Epoch 6 || FLinear[dense]/linea | | | | +Epoch 6 || r_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.278 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[7]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[qu | | | | +Epoch 6 || ery]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.277 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[7]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[ke | | | | +Epoch 6 || y]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.284 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[7]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[va | | | | +Epoch 6 || lue]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.287 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[7]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfOutput[ou | | | | +Epoch 6 || tput]/NNCFLinear[den | | | | +Epoch 6 || se]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [3072, 768] | 0.524 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[7]/Be | | | | +Epoch 6 || rtIntermediate[inter | | | | +Epoch 6 || mediate]/NNCFLinear[ | | | | +Epoch 6 || dense]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 3072] | 0.537 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[7]/Be | | | | +Epoch 6 || rtOutput[output]/NNC | | | | +Epoch 6 || FLinear[dense]/linea | | | | +Epoch 6 || r_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.277 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[8]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[qu | | | | +Epoch 6 || ery]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.278 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[8]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[ke | | | | +Epoch 6 || y]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.281 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[8]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[va | | | | +Epoch 6 || lue]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.283 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[8]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfOutput[ou | | | | +Epoch 6 || tput]/NNCFLinear[den | | | | +Epoch 6 || se]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [3072, 768] | 0.521 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[8]/Be | | | | +Epoch 6 || rtIntermediate[inter | | | | +Epoch 6 || mediate]/NNCFLinear[ | | | | +Epoch 6 || dense]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 3072] | 0.533 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[8]/Be | | | | +Epoch 6 || rtOutput[output]/NNC | | | | +Epoch 6 || FLinear[dense]/linea | | | | +Epoch 6 || r_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.275 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[9]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[qu | | | | +Epoch 6 || ery]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.277 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[9]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[ke | | | | +Epoch 6 || y]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.279 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[9]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfAttention | | | | +Epoch 6 || [self]/NNCFLinear[va | | | | +Epoch 6 || lue]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.278 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[9]/Be | | | | +Epoch 6 || rtAttention[attentio | | | | +Epoch 6 || n]/BertSelfOutput[ou | | | | +Epoch 6 || tput]/NNCFLinear[den | | | | +Epoch 6 || se]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [3072, 768] | 0.525 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[9]/Be | | | | +Epoch 6 || rtIntermediate[inter | | | | +Epoch 6 || mediate]/NNCFLinear[ | | | | +Epoch 6 || dense]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 3072] | 0.535 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[9]/Be | | | | +Epoch 6 || rtOutput[output]/NNC | | | | +Epoch 6 || FLinear[dense]/linea | | | | +Epoch 6 || r_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.277 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[10]/B | | | | +Epoch 6 || ertAttention[attenti | | | | +Epoch 6 || on]/BertSelfAttentio | | | | +Epoch 6 || n[self]/NNCFLinear[q | | | | +Epoch 6 || uery]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.277 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[10]/B | | | | +Epoch 6 || ertAttention[attenti | | | | +Epoch 6 || on]/BertSelfAttentio | | | | +Epoch 6 || n[self]/NNCFLinear[k | | | | +Epoch 6 || ey]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.290 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[10]/B | | | | +Epoch 6 || ertAttention[attenti | | | | +Epoch 6 || on]/BertSelfAttentio | | | | +Epoch 6 || n[self]/NNCFLinear[v | | | | +Epoch 6 || alue]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.284 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[10]/B | | | | +Epoch 6 || ertAttention[attenti | | | | +Epoch 6 || on]/BertSelfOutput[o | | | | +Epoch 6 || utput]/NNCFLinear[de | | | | +Epoch 6 || nse]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [3072, 768] | 0.519 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[10]/B | | | | +Epoch 6 || ertIntermediate[inte | | | | +Epoch 6 || rmediate]/NNCFLinear | | | | +Epoch 6 || [dense]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 3072] | 0.526 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[10]/B | | | | +Epoch 6 || ertOutput[output]/NN | | | | +Epoch 6 || CFLinear[dense]/line | | | | +Epoch 6 || ar_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.279 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[11]/B | | | | +Epoch 6 || ertAttention[attenti | | | | +Epoch 6 || on]/BertSelfAttentio | | | | +Epoch 6 || n[self]/NNCFLinear[q | | | | +Epoch 6 || uery]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.276 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[11]/B | | | | +Epoch 6 || ertAttention[attenti | | | | +Epoch 6 || on]/BertSelfAttentio | | | | +Epoch 6 || n[self]/NNCFLinear[k | | | | +Epoch 6 || ey]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.282 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[11]/B | | | | +Epoch 6 || ertAttention[attenti | | | | +Epoch 6 || on]/BertSelfAttentio | | | | +Epoch 6 || n[self]/NNCFLinear[v | | | | +Epoch 6 || alue]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 768] | 0.276 | 0.694 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[11]/B | | | | +Epoch 6 || ertAttention[attenti | | | | +Epoch 6 || on]/BertSelfOutput[o | | | | +Epoch 6 || utput]/NNCFLinear[de | | | | +Epoch 6 || nse]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [3072, 768] | 0.518 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[11]/B | | | | +Epoch 6 || ertIntermediate[inte | | | | +Epoch 6 || rmediate]/NNCFLinear | | | | +Epoch 6 || [dense]/linear_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 || BertForSequenceClass | [768, 3072] | 0.521 | 2.778 | +Epoch 6 || ification/BertModel[ | | | | +Epoch 6 || bert]/BertEncoder[en | | | | +Epoch 6 || coder]/ModuleList[la | | | | +Epoch 6 || yer]/BertLayer[11]/B | | | | +Epoch 6 || ertOutput[output]/NN | | | | +Epoch 6 || CFLinear[dense]/line | | | | +Epoch 6 || ar_0 | | | | +Epoch 6 |+----------------------+----------------+----------------+---------------------+ +Epoch 6 | +Epoch 6 |Statistics of the magnitude sparsity algorithm: +Epoch 6 |+----------------------------------------------------------------------+-------+ +Epoch 6 || Statistic's name | Value | +Epoch 6 |+======================================================================+=======+ +Epoch 6 || A target level of the sparsity for the algorithm for the current | 0.449 | +Epoch 6 || epoch | | +Epoch 6 |+----------------------------------------------------------------------+-------+ +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || Layer's name | Sparsity threshold | +Epoch 6 |+=========================================================+====================+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 6 || linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 6 || near_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 6 || linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 6 || inear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermed | | +Epoch 6 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[o | | +Epoch 6 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 6 || linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 6 || near_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 6 || linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 6 || inear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermed | | +Epoch 6 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[o | | +Epoch 6 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 6 || linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 6 || near_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 6 || linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 6 || inear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermed | | +Epoch 6 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[o | | +Epoch 6 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 6 || linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 6 || near_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 6 || linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 6 || inear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermed | | +Epoch 6 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[o | | +Epoch 6 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 6 || linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 6 || near_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 6 || linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 6 || inear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermed | | +Epoch 6 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[o | | +Epoch 6 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 6 || linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 6 || near_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 6 || linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 6 || inear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermed | | +Epoch 6 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[o | | +Epoch 6 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 6 || linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 6 || near_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 6 || linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 6 || inear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermed | | +Epoch 6 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[o | | +Epoch 6 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 6 || linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 6 || near_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 6 || linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 6 || inear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermed | | +Epoch 6 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[o | | +Epoch 6 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 6 || linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 6 || near_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 6 || linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 6 || inear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermed | | +Epoch 6 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[o | | +Epoch 6 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 6 || linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 6 || near_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 6 || linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 6 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 6 || inear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermed | | +Epoch 6 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[o | | +Epoch 6 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 6 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 6 || /linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 6 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 6 || inear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 6 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 6 || /linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 6 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 6 || linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertInterme | | +Epoch 6 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[ | | +Epoch 6 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 6 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 6 || /linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 6 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 6 || inear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 6 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 6 || /linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 6 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 6 || linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertInterme | | +Epoch 6 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +Epoch 6 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.000 | +Epoch 6 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[ | | +Epoch 6 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 6 |+---------------------------------------------------------+--------------------+ +INFO:nncf:Statistics of the quantization algorithm: +Epoch 7 |+--------------------------------+-------+ +Epoch 7 || Statistic's name | Value | +Epoch 7 |+================================+=======+ +Epoch 7 || Ratio of enabled quantizations | 100 | +Epoch 7 |+--------------------------------+-------+ +Epoch 7 | +Epoch 7 |Statistics of the quantization share: +Epoch 7 |+----------------------------------+--------------------+ +Epoch 7 || Statistic's name | Value | +Epoch 7 |+==================================+====================+ +Epoch 7 || Symmetric WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 7 |+----------------------------------+--------------------+ +Epoch 7 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 7 |+----------------------------------+--------------------+ +Epoch 7 || Signed WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 7 |+----------------------------------+--------------------+ +Epoch 7 || Unsigned WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 7 |+----------------------------------+--------------------+ +Epoch 7 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 7 |+----------------------------------+--------------------+ +Epoch 7 || Per-channel WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 7 |+----------------------------------+--------------------+ +Epoch 7 || Placed WQs / Potential WQs | 72.55 % (74 / 102) | +Epoch 7 |+----------------------------------+--------------------+ +Epoch 7 || Symmetric AQs / All placed AQs | 24.24 % (24 / 99) | +Epoch 7 |+----------------------------------+--------------------+ +Epoch 7 || Asymmetric AQs / All placed AQs | 75.76 % (75 / 99) | +Epoch 7 |+----------------------------------+--------------------+ +Epoch 7 || Signed AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 7 |+----------------------------------+--------------------+ +Epoch 7 || Unsigned AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 7 |+----------------------------------+--------------------+ +Epoch 7 || Per-tensor AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 7 |+----------------------------------+--------------------+ +Epoch 7 || Per-channel AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 7 |+----------------------------------+--------------------+ +Epoch 7 | +Epoch 7 |Statistics of the bitwidth distribution: +Epoch 7 |+--------------+---------------------+--------------------+--------------------+ +Epoch 7 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 7 || | WQs | Placed AQs | Qs | +Epoch 7 |+==============+=====================+====================+====================+ +Epoch 7 || 8 | 100.00 % (74 / 74) | 100.00 % (99 / 99) | 100.00 % (173 / | +Epoch 7 || | | | 173) | +Epoch 7 |+--------------+---------------------+--------------------+--------------------+ +Epoch 7 | +Epoch 7 |Statistics of the sparsified model: +Epoch 7 |+-----------------------------------------+-------+ +Epoch 7 || Statistic's name | Value | +Epoch 7 |+=========================================+=======+ +Epoch 7 || Sparsity level of the whole model | 0.389 | +Epoch 7 |+-----------------------------------------+-------+ +Epoch 7 || Sparsity level of all sparsified layers | 0.501 | +Epoch 7 |+-----------------------------------------+-------+ +Epoch 7 | +Epoch 7 |Statistics by sparsified layers: +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 7 |+======================+================+================+=====================+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.324 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[0]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[qu | | | | +Epoch 7 || ery]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.330 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[0]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[ke | | | | +Epoch 7 || y]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.324 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[0]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[va | | | | +Epoch 7 || lue]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.335 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[0]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfOutput[ou | | | | +Epoch 7 || tput]/NNCFLinear[den | | | | +Epoch 7 || se]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [3072, 768] | 0.584 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[0]/Be | | | | +Epoch 7 || rtIntermediate[inter | | | | +Epoch 7 || mediate]/NNCFLinear[ | | | | +Epoch 7 || dense]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 3072] | 0.596 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[0]/Be | | | | +Epoch 7 || rtOutput[output]/NNC | | | | +Epoch 7 || FLinear[dense]/linea | | | | +Epoch 7 || r_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.320 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[1]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[qu | | | | +Epoch 7 || ery]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.324 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[1]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[ke | | | | +Epoch 7 || y]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.321 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[1]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[va | | | | +Epoch 7 || lue]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.334 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[1]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfOutput[ou | | | | +Epoch 7 || tput]/NNCFLinear[den | | | | +Epoch 7 || se]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [3072, 768] | 0.586 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[1]/Be | | | | +Epoch 7 || rtIntermediate[inter | | | | +Epoch 7 || mediate]/NNCFLinear[ | | | | +Epoch 7 || dense]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 3072] | 0.600 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[1]/Be | | | | +Epoch 7 || rtOutput[output]/NNC | | | | +Epoch 7 || FLinear[dense]/linea | | | | +Epoch 7 || r_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.336 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[2]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[qu | | | | +Epoch 7 || ery]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.336 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[2]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[ke | | | | +Epoch 7 || y]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.325 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[2]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[va | | | | +Epoch 7 || lue]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.331 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[2]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfOutput[ou | | | | +Epoch 7 || tput]/NNCFLinear[den | | | | +Epoch 7 || se]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [3072, 768] | 0.587 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[2]/Be | | | | +Epoch 7 || rtIntermediate[inter | | | | +Epoch 7 || mediate]/NNCFLinear[ | | | | +Epoch 7 || dense]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 3072] | 0.600 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[2]/Be | | | | +Epoch 7 || rtOutput[output]/NNC | | | | +Epoch 7 || FLinear[dense]/linea | | | | +Epoch 7 || r_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.319 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[3]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[qu | | | | +Epoch 7 || ery]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.321 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[3]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[ke | | | | +Epoch 7 || y]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.326 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[3]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[va | | | | +Epoch 7 || lue]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.329 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[3]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfOutput[ou | | | | +Epoch 7 || tput]/NNCFLinear[den | | | | +Epoch 7 || se]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [3072, 768] | 0.588 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[3]/Be | | | | +Epoch 7 || rtIntermediate[inter | | | | +Epoch 7 || mediate]/NNCFLinear[ | | | | +Epoch 7 || dense]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 3072] | 0.605 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[3]/Be | | | | +Epoch 7 || rtOutput[output]/NNC | | | | +Epoch 7 || FLinear[dense]/linea | | | | +Epoch 7 || r_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.317 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[4]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[qu | | | | +Epoch 7 || ery]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.316 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[4]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[ke | | | | +Epoch 7 || y]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.322 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[4]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[va | | | | +Epoch 7 || lue]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.328 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[4]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfOutput[ou | | | | +Epoch 7 || tput]/NNCFLinear[den | | | | +Epoch 7 || se]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [3072, 768] | 0.590 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[4]/Be | | | | +Epoch 7 || rtIntermediate[inter | | | | +Epoch 7 || mediate]/NNCFLinear[ | | | | +Epoch 7 || dense]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 3072] | 0.609 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[4]/Be | | | | +Epoch 7 || rtOutput[output]/NNC | | | | +Epoch 7 || FLinear[dense]/linea | | | | +Epoch 7 || r_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.317 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[5]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[qu | | | | +Epoch 7 || ery]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.318 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[5]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[ke | | | | +Epoch 7 || y]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.326 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[5]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[va | | | | +Epoch 7 || lue]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.329 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[5]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfOutput[ou | | | | +Epoch 7 || tput]/NNCFLinear[den | | | | +Epoch 7 || se]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [3072, 768] | 0.589 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[5]/Be | | | | +Epoch 7 || rtIntermediate[inter | | | | +Epoch 7 || mediate]/NNCFLinear[ | | | | +Epoch 7 || dense]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 3072] | 0.608 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[5]/Be | | | | +Epoch 7 || rtOutput[output]/NNC | | | | +Epoch 7 || FLinear[dense]/linea | | | | +Epoch 7 || r_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.315 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[6]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[qu | | | | +Epoch 7 || ery]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.316 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[6]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[ke | | | | +Epoch 7 || y]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.326 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[6]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[va | | | | +Epoch 7 || lue]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.329 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[6]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfOutput[ou | | | | +Epoch 7 || tput]/NNCFLinear[den | | | | +Epoch 7 || se]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [3072, 768] | 0.588 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[6]/Be | | | | +Epoch 7 || rtIntermediate[inter | | | | +Epoch 7 || mediate]/NNCFLinear[ | | | | +Epoch 7 || dense]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 3072] | 0.604 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[6]/Be | | | | +Epoch 7 || rtOutput[output]/NNC | | | | +Epoch 7 || FLinear[dense]/linea | | | | +Epoch 7 || r_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.315 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[7]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[qu | | | | +Epoch 7 || ery]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.314 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[7]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[ke | | | | +Epoch 7 || y]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.322 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[7]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[va | | | | +Epoch 7 || lue]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.324 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[7]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfOutput[ou | | | | +Epoch 7 || tput]/NNCFLinear[den | | | | +Epoch 7 || se]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [3072, 768] | 0.584 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[7]/Be | | | | +Epoch 7 || rtIntermediate[inter | | | | +Epoch 7 || mediate]/NNCFLinear[ | | | | +Epoch 7 || dense]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 3072] | 0.597 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[7]/Be | | | | +Epoch 7 || rtOutput[output]/NNC | | | | +Epoch 7 || FLinear[dense]/linea | | | | +Epoch 7 || r_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.315 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[8]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[qu | | | | +Epoch 7 || ery]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.316 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[8]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[ke | | | | +Epoch 7 || y]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.318 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[8]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[va | | | | +Epoch 7 || lue]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.320 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[8]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfOutput[ou | | | | +Epoch 7 || tput]/NNCFLinear[den | | | | +Epoch 7 || se]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [3072, 768] | 0.581 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[8]/Be | | | | +Epoch 7 || rtIntermediate[inter | | | | +Epoch 7 || mediate]/NNCFLinear[ | | | | +Epoch 7 || dense]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 3072] | 0.593 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[8]/Be | | | | +Epoch 7 || rtOutput[output]/NNC | | | | +Epoch 7 || FLinear[dense]/linea | | | | +Epoch 7 || r_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.312 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[9]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[qu | | | | +Epoch 7 || ery]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.315 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[9]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[ke | | | | +Epoch 7 || y]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.316 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[9]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfAttention | | | | +Epoch 7 || [self]/NNCFLinear[va | | | | +Epoch 7 || lue]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.315 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[9]/Be | | | | +Epoch 7 || rtAttention[attentio | | | | +Epoch 7 || n]/BertSelfOutput[ou | | | | +Epoch 7 || tput]/NNCFLinear[den | | | | +Epoch 7 || se]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [3072, 768] | 0.585 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[9]/Be | | | | +Epoch 7 || rtIntermediate[inter | | | | +Epoch 7 || mediate]/NNCFLinear[ | | | | +Epoch 7 || dense]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 3072] | 0.595 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[9]/Be | | | | +Epoch 7 || rtOutput[output]/NNC | | | | +Epoch 7 || FLinear[dense]/linea | | | | +Epoch 7 || r_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.314 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[10]/B | | | | +Epoch 7 || ertAttention[attenti | | | | +Epoch 7 || on]/BertSelfAttentio | | | | +Epoch 7 || n[self]/NNCFLinear[q | | | | +Epoch 7 || uery]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.314 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[10]/B | | | | +Epoch 7 || ertAttention[attenti | | | | +Epoch 7 || on]/BertSelfAttentio | | | | +Epoch 7 || n[self]/NNCFLinear[k | | | | +Epoch 7 || ey]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.328 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[10]/B | | | | +Epoch 7 || ertAttention[attenti | | | | +Epoch 7 || on]/BertSelfAttentio | | | | +Epoch 7 || n[self]/NNCFLinear[v | | | | +Epoch 7 || alue]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.321 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[10]/B | | | | +Epoch 7 || ertAttention[attenti | | | | +Epoch 7 || on]/BertSelfOutput[o | | | | +Epoch 7 || utput]/NNCFLinear[de | | | | +Epoch 7 || nse]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [3072, 768] | 0.579 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[10]/B | | | | +Epoch 7 || ertIntermediate[inte | | | | +Epoch 7 || rmediate]/NNCFLinear | | | | +Epoch 7 || [dense]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 3072] | 0.587 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[10]/B | | | | +Epoch 7 || ertOutput[output]/NN | | | | +Epoch 7 || CFLinear[dense]/line | | | | +Epoch 7 || ar_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.316 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[11]/B | | | | +Epoch 7 || ertAttention[attenti | | | | +Epoch 7 || on]/BertSelfAttentio | | | | +Epoch 7 || n[self]/NNCFLinear[q | | | | +Epoch 7 || uery]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.313 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[11]/B | | | | +Epoch 7 || ertAttention[attenti | | | | +Epoch 7 || on]/BertSelfAttentio | | | | +Epoch 7 || n[self]/NNCFLinear[k | | | | +Epoch 7 || ey]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.319 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[11]/B | | | | +Epoch 7 || ertAttention[attenti | | | | +Epoch 7 || on]/BertSelfAttentio | | | | +Epoch 7 || n[self]/NNCFLinear[v | | | | +Epoch 7 || alue]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 768] | 0.314 | 0.694 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[11]/B | | | | +Epoch 7 || ertAttention[attenti | | | | +Epoch 7 || on]/BertSelfOutput[o | | | | +Epoch 7 || utput]/NNCFLinear[de | | | | +Epoch 7 || nse]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [3072, 768] | 0.578 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[11]/B | | | | +Epoch 7 || ertIntermediate[inte | | | | +Epoch 7 || rmediate]/NNCFLinear | | | | +Epoch 7 || [dense]/linear_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 || BertForSequenceClass | [768, 3072] | 0.581 | 2.778 | +Epoch 7 || ification/BertModel[ | | | | +Epoch 7 || bert]/BertEncoder[en | | | | +Epoch 7 || coder]/ModuleList[la | | | | +Epoch 7 || yer]/BertLayer[11]/B | | | | +Epoch 7 || ertOutput[output]/NN | | | | +Epoch 7 || CFLinear[dense]/line | | | | +Epoch 7 || ar_0 | | | | +Epoch 7 |+----------------------+----------------+----------------+---------------------+ +Epoch 7 | +Epoch 7 |Statistics of the magnitude sparsity algorithm: +Epoch 7 |+----------------------------------------------------------------------+-------+ +Epoch 7 || Statistic's name | Value | +Epoch 7 |+======================================================================+=======+ +Epoch 7 || A target level of the sparsity for the algorithm for the current | 0.501 | +Epoch 7 || epoch | | +Epoch 7 |+----------------------------------------------------------------------+-------+ +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || Layer's name | Sparsity threshold | +Epoch 7 |+=========================================================+====================+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 7 || linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 7 || near_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 7 || linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 7 || inear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermed | | +Epoch 7 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[o | | +Epoch 7 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 7 || linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 7 || near_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 7 || linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 7 || inear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermed | | +Epoch 7 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[o | | +Epoch 7 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 7 || linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 7 || near_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 7 || linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 7 || inear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermed | | +Epoch 7 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[o | | +Epoch 7 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 7 || linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 7 || near_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 7 || linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 7 || inear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermed | | +Epoch 7 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[o | | +Epoch 7 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 7 || linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 7 || near_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 7 || linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 7 || inear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermed | | +Epoch 7 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[o | | +Epoch 7 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 7 || linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 7 || near_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 7 || linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 7 || inear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermed | | +Epoch 7 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[o | | +Epoch 7 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 7 || linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 7 || near_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 7 || linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 7 || inear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermed | | +Epoch 7 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[o | | +Epoch 7 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 7 || linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 7 || near_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 7 || linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 7 || inear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermed | | +Epoch 7 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[o | | +Epoch 7 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 7 || linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 7 || near_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 7 || linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 7 || inear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermed | | +Epoch 7 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[o | | +Epoch 7 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 7 || linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 7 || near_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 7 || linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 7 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 7 || inear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermed | | +Epoch 7 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[o | | +Epoch 7 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 7 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 7 || /linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 7 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 7 || inear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 7 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 7 || /linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 7 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 7 || linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertInterme | | +Epoch 7 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[ | | +Epoch 7 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 7 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 7 || /linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 7 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 7 || inear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 7 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 7 || /linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 7 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 7 || linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertInterme | | +Epoch 7 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +Epoch 7 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 7 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[ | | +Epoch 7 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 7 |+---------------------------------------------------------+--------------------+ +INFO:nncf:Statistics of the quantization algorithm: +Epoch 8 |+--------------------------------+-------+ +Epoch 8 || Statistic's name | Value | +Epoch 8 |+================================+=======+ +Epoch 8 || Ratio of enabled quantizations | 100 | +Epoch 8 |+--------------------------------+-------+ +Epoch 8 | +Epoch 8 |Statistics of the quantization share: +Epoch 8 |+----------------------------------+--------------------+ +Epoch 8 || Statistic's name | Value | +Epoch 8 |+==================================+====================+ +Epoch 8 || Symmetric WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 8 |+----------------------------------+--------------------+ +Epoch 8 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 8 |+----------------------------------+--------------------+ +Epoch 8 || Signed WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 8 |+----------------------------------+--------------------+ +Epoch 8 || Unsigned WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 8 |+----------------------------------+--------------------+ +Epoch 8 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 8 |+----------------------------------+--------------------+ +Epoch 8 || Per-channel WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 8 |+----------------------------------+--------------------+ +Epoch 8 || Placed WQs / Potential WQs | 72.55 % (74 / 102) | +Epoch 8 |+----------------------------------+--------------------+ +Epoch 8 || Symmetric AQs / All placed AQs | 24.24 % (24 / 99) | +Epoch 8 |+----------------------------------+--------------------+ +Epoch 8 || Asymmetric AQs / All placed AQs | 75.76 % (75 / 99) | +Epoch 8 |+----------------------------------+--------------------+ +Epoch 8 || Signed AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 8 |+----------------------------------+--------------------+ +Epoch 8 || Unsigned AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 8 |+----------------------------------+--------------------+ +Epoch 8 || Per-tensor AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 8 |+----------------------------------+--------------------+ +Epoch 8 || Per-channel AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 8 |+----------------------------------+--------------------+ +Epoch 8 | +Epoch 8 |Statistics of the bitwidth distribution: +Epoch 8 |+--------------+---------------------+--------------------+--------------------+ +Epoch 8 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 8 || | WQs | Placed AQs | Qs | +Epoch 8 |+==============+=====================+====================+====================+ +Epoch 8 || 8 | 100.00 % (74 / 74) | 100.00 % (99 / 99) | 100.00 % (173 / | +Epoch 8 || | | | 173) | +Epoch 8 |+--------------+---------------------+--------------------+--------------------+ +Epoch 8 | +Epoch 8 |Statistics of the sparsified model: +Epoch 8 |+-----------------------------------------+-------+ +Epoch 8 || Statistic's name | Value | +Epoch 8 |+=========================================+=======+ +Epoch 8 || Sparsity level of the whole model | 0.425 | +Epoch 8 |+-----------------------------------------+-------+ +Epoch 8 || Sparsity level of all sparsified layers | 0.548 | +Epoch 8 |+-----------------------------------------+-------+ +Epoch 8 | +Epoch 8 |Statistics by sparsified layers: +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 8 |+======================+================+================+=====================+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.360 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[0]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[qu | | | | +Epoch 8 || ery]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.367 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[0]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[ke | | | | +Epoch 8 || y]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.359 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[0]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[va | | | | +Epoch 8 || lue]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.372 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[0]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfOutput[ou | | | | +Epoch 8 || tput]/NNCFLinear[den | | | | +Epoch 8 || se]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [3072, 768] | 0.637 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[0]/Be | | | | +Epoch 8 || rtIntermediate[inter | | | | +Epoch 8 || mediate]/NNCFLinear[ | | | | +Epoch 8 || dense]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 3072] | 0.649 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[0]/Be | | | | +Epoch 8 || rtOutput[output]/NNC | | | | +Epoch 8 || FLinear[dense]/linea | | | | +Epoch 8 || r_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.356 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[1]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[qu | | | | +Epoch 8 || ery]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.360 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[1]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[ke | | | | +Epoch 8 || y]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.357 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[1]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[va | | | | +Epoch 8 || lue]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.371 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[1]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfOutput[ou | | | | +Epoch 8 || tput]/NNCFLinear[den | | | | +Epoch 8 || se]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [3072, 768] | 0.638 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[1]/Be | | | | +Epoch 8 || rtIntermediate[inter | | | | +Epoch 8 || mediate]/NNCFLinear[ | | | | +Epoch 8 || dense]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 3072] | 0.652 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[1]/Be | | | | +Epoch 8 || rtOutput[output]/NNC | | | | +Epoch 8 || FLinear[dense]/linea | | | | +Epoch 8 || r_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.373 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[2]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[qu | | | | +Epoch 8 || ery]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.373 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[2]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[ke | | | | +Epoch 8 || y]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.361 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[2]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[va | | | | +Epoch 8 || lue]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.367 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[2]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfOutput[ou | | | | +Epoch 8 || tput]/NNCFLinear[den | | | | +Epoch 8 || se]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [3072, 768] | 0.640 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[2]/Be | | | | +Epoch 8 || rtIntermediate[inter | | | | +Epoch 8 || mediate]/NNCFLinear[ | | | | +Epoch 8 || dense]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 3072] | 0.653 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[2]/Be | | | | +Epoch 8 || rtOutput[output]/NNC | | | | +Epoch 8 || FLinear[dense]/linea | | | | +Epoch 8 || r_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.354 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[3]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[qu | | | | +Epoch 8 || ery]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.356 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[3]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[ke | | | | +Epoch 8 || y]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.361 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[3]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[va | | | | +Epoch 8 || lue]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.365 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[3]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfOutput[ou | | | | +Epoch 8 || tput]/NNCFLinear[den | | | | +Epoch 8 || se]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [3072, 768] | 0.641 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[3]/Be | | | | +Epoch 8 || rtIntermediate[inter | | | | +Epoch 8 || mediate]/NNCFLinear[ | | | | +Epoch 8 || dense]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 3072] | 0.658 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[3]/Be | | | | +Epoch 8 || rtOutput[output]/NNC | | | | +Epoch 8 || FLinear[dense]/linea | | | | +Epoch 8 || r_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.352 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[4]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[qu | | | | +Epoch 8 || ery]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.352 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[4]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[ke | | | | +Epoch 8 || y]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.357 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[4]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[va | | | | +Epoch 8 || lue]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.364 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[4]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfOutput[ou | | | | +Epoch 8 || tput]/NNCFLinear[den | | | | +Epoch 8 || se]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [3072, 768] | 0.642 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[4]/Be | | | | +Epoch 8 || rtIntermediate[inter | | | | +Epoch 8 || mediate]/NNCFLinear[ | | | | +Epoch 8 || dense]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 3072] | 0.661 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[4]/Be | | | | +Epoch 8 || rtOutput[output]/NNC | | | | +Epoch 8 || FLinear[dense]/linea | | | | +Epoch 8 || r_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.352 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[5]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[qu | | | | +Epoch 8 || ery]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.353 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[5]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[ke | | | | +Epoch 8 || y]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.362 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[5]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[va | | | | +Epoch 8 || lue]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.364 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[5]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfOutput[ou | | | | +Epoch 8 || tput]/NNCFLinear[den | | | | +Epoch 8 || se]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [3072, 768] | 0.641 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[5]/Be | | | | +Epoch 8 || rtIntermediate[inter | | | | +Epoch 8 || mediate]/NNCFLinear[ | | | | +Epoch 8 || dense]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 3072] | 0.660 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[5]/Be | | | | +Epoch 8 || rtOutput[output]/NNC | | | | +Epoch 8 || FLinear[dense]/linea | | | | +Epoch 8 || r_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.350 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[6]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[qu | | | | +Epoch 8 || ery]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.351 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[6]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[ke | | | | +Epoch 8 || y]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.362 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[6]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[va | | | | +Epoch 8 || lue]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.365 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[6]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfOutput[ou | | | | +Epoch 8 || tput]/NNCFLinear[den | | | | +Epoch 8 || se]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [3072, 768] | 0.640 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[6]/Be | | | | +Epoch 8 || rtIntermediate[inter | | | | +Epoch 8 || mediate]/NNCFLinear[ | | | | +Epoch 8 || dense]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 3072] | 0.656 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[6]/Be | | | | +Epoch 8 || rtOutput[output]/NNC | | | | +Epoch 8 || FLinear[dense]/linea | | | | +Epoch 8 || r_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.350 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[7]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[qu | | | | +Epoch 8 || ery]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.349 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[7]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[ke | | | | +Epoch 8 || y]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.357 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[7]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[va | | | | +Epoch 8 || lue]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.361 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[7]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfOutput[ou | | | | +Epoch 8 || tput]/NNCFLinear[den | | | | +Epoch 8 || se]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [3072, 768] | 0.637 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[7]/Be | | | | +Epoch 8 || rtIntermediate[inter | | | | +Epoch 8 || mediate]/NNCFLinear[ | | | | +Epoch 8 || dense]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 3072] | 0.650 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[7]/Be | | | | +Epoch 8 || rtOutput[output]/NNC | | | | +Epoch 8 || FLinear[dense]/linea | | | | +Epoch 8 || r_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.350 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[8]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[qu | | | | +Epoch 8 || ery]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.351 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[8]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[ke | | | | +Epoch 8 || y]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.353 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[8]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[va | | | | +Epoch 8 || lue]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.356 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[8]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfOutput[ou | | | | +Epoch 8 || tput]/NNCFLinear[den | | | | +Epoch 8 || se]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [3072, 768] | 0.634 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[8]/Be | | | | +Epoch 8 || rtIntermediate[inter | | | | +Epoch 8 || mediate]/NNCFLinear[ | | | | +Epoch 8 || dense]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 3072] | 0.645 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[8]/Be | | | | +Epoch 8 || rtOutput[output]/NNC | | | | +Epoch 8 || FLinear[dense]/linea | | | | +Epoch 8 || r_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.348 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[9]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[qu | | | | +Epoch 8 || ery]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.350 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[9]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[ke | | | | +Epoch 8 || y]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.351 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[9]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfAttention | | | | +Epoch 8 || [self]/NNCFLinear[va | | | | +Epoch 8 || lue]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.351 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[9]/Be | | | | +Epoch 8 || rtAttention[attentio | | | | +Epoch 8 || n]/BertSelfOutput[ou | | | | +Epoch 8 || tput]/NNCFLinear[den | | | | +Epoch 8 || se]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [3072, 768] | 0.638 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[9]/Be | | | | +Epoch 8 || rtIntermediate[inter | | | | +Epoch 8 || mediate]/NNCFLinear[ | | | | +Epoch 8 || dense]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 3072] | 0.648 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[9]/Be | | | | +Epoch 8 || rtOutput[output]/NNC | | | | +Epoch 8 || FLinear[dense]/linea | | | | +Epoch 8 || r_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.349 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[10]/B | | | | +Epoch 8 || ertAttention[attenti | | | | +Epoch 8 || on]/BertSelfAttentio | | | | +Epoch 8 || n[self]/NNCFLinear[q | | | | +Epoch 8 || uery]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.350 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[10]/B | | | | +Epoch 8 || ertAttention[attenti | | | | +Epoch 8 || on]/BertSelfAttentio | | | | +Epoch 8 || n[self]/NNCFLinear[k | | | | +Epoch 8 || ey]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.363 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[10]/B | | | | +Epoch 8 || ertAttention[attenti | | | | +Epoch 8 || on]/BertSelfAttentio | | | | +Epoch 8 || n[self]/NNCFLinear[v | | | | +Epoch 8 || alue]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.357 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[10]/B | | | | +Epoch 8 || ertAttention[attenti | | | | +Epoch 8 || on]/BertSelfOutput[o | | | | +Epoch 8 || utput]/NNCFLinear[de | | | | +Epoch 8 || nse]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [3072, 768] | 0.632 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[10]/B | | | | +Epoch 8 || ertIntermediate[inte | | | | +Epoch 8 || rmediate]/NNCFLinear | | | | +Epoch 8 || [dense]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 3072] | 0.640 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[10]/B | | | | +Epoch 8 || ertOutput[output]/NN | | | | +Epoch 8 || CFLinear[dense]/line | | | | +Epoch 8 || ar_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.352 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[11]/B | | | | +Epoch 8 || ertAttention[attenti | | | | +Epoch 8 || on]/BertSelfAttentio | | | | +Epoch 8 || n[self]/NNCFLinear[q | | | | +Epoch 8 || uery]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.349 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[11]/B | | | | +Epoch 8 || ertAttention[attenti | | | | +Epoch 8 || on]/BertSelfAttentio | | | | +Epoch 8 || n[self]/NNCFLinear[k | | | | +Epoch 8 || ey]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.355 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[11]/B | | | | +Epoch 8 || ertAttention[attenti | | | | +Epoch 8 || on]/BertSelfAttentio | | | | +Epoch 8 || n[self]/NNCFLinear[v | | | | +Epoch 8 || alue]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 768] | 0.349 | 0.694 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[11]/B | | | | +Epoch 8 || ertAttention[attenti | | | | +Epoch 8 || on]/BertSelfOutput[o | | | | +Epoch 8 || utput]/NNCFLinear[de | | | | +Epoch 8 || nse]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [3072, 768] | 0.631 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[11]/B | | | | +Epoch 8 || ertIntermediate[inte | | | | +Epoch 8 || rmediate]/NNCFLinear | | | | +Epoch 8 || [dense]/linear_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 || BertForSequenceClass | [768, 3072] | 0.634 | 2.778 | +Epoch 8 || ification/BertModel[ | | | | +Epoch 8 || bert]/BertEncoder[en | | | | +Epoch 8 || coder]/ModuleList[la | | | | +Epoch 8 || yer]/BertLayer[11]/B | | | | +Epoch 8 || ertOutput[output]/NN | | | | +Epoch 8 || CFLinear[dense]/line | | | | +Epoch 8 || ar_0 | | | | +Epoch 8 |+----------------------+----------------+----------------+---------------------+ +Epoch 8 | +Epoch 8 |Statistics of the magnitude sparsity algorithm: +Epoch 8 |+----------------------------------------------------------------------+-------+ +Epoch 8 || Statistic's name | Value | +Epoch 8 |+======================================================================+=======+ +Epoch 8 || A target level of the sparsity for the algorithm for the current | 0.548 | +Epoch 8 || epoch | | +Epoch 8 |+----------------------------------------------------------------------+-------+ +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || Layer's name | Sparsity threshold | +Epoch 8 |+=========================================================+====================+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 8 || linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 8 || near_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 8 || linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 8 || inear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermed | | +Epoch 8 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[o | | +Epoch 8 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 8 || linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 8 || near_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 8 || linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 8 || inear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermed | | +Epoch 8 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[o | | +Epoch 8 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 8 || linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 8 || near_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 8 || linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 8 || inear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermed | | +Epoch 8 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[o | | +Epoch 8 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 8 || linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 8 || near_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 8 || linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 8 || inear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermed | | +Epoch 8 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[o | | +Epoch 8 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 8 || linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 8 || near_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 8 || linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 8 || inear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermed | | +Epoch 8 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[o | | +Epoch 8 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 8 || linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 8 || near_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 8 || linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 8 || inear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermed | | +Epoch 8 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[o | | +Epoch 8 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 8 || linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 8 || near_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 8 || linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 8 || inear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermed | | +Epoch 8 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[o | | +Epoch 8 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 8 || linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 8 || near_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 8 || linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 8 || inear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermed | | +Epoch 8 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[o | | +Epoch 8 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 8 || linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 8 || near_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 8 || linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 8 || inear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermed | | +Epoch 8 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[o | | +Epoch 8 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 8 || linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 8 || near_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 8 || linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 8 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 8 || inear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermed | | +Epoch 8 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[o | | +Epoch 8 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 8 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 8 || /linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 8 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 8 || inear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 8 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 8 || /linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 8 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 8 || linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertInterme | | +Epoch 8 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[ | | +Epoch 8 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 8 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 8 || /linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 8 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 8 || inear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 8 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 8 || /linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 8 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 8 || linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertInterme | | +Epoch 8 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +Epoch 8 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 8 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[ | | +Epoch 8 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 8 |+---------------------------------------------------------+--------------------+ +INFO:nncf:Statistics of the quantization algorithm: +Epoch 9 |+--------------------------------+-------+ +Epoch 9 || Statistic's name | Value | +Epoch 9 |+================================+=======+ +Epoch 9 || Ratio of enabled quantizations | 100 | +Epoch 9 |+--------------------------------+-------+ +Epoch 9 | +Epoch 9 |Statistics of the quantization share: +Epoch 9 |+----------------------------------+--------------------+ +Epoch 9 || Statistic's name | Value | +Epoch 9 |+==================================+====================+ +Epoch 9 || Symmetric WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 9 |+----------------------------------+--------------------+ +Epoch 9 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 9 |+----------------------------------+--------------------+ +Epoch 9 || Signed WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 9 |+----------------------------------+--------------------+ +Epoch 9 || Unsigned WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 9 |+----------------------------------+--------------------+ +Epoch 9 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 9 |+----------------------------------+--------------------+ +Epoch 9 || Per-channel WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 9 |+----------------------------------+--------------------+ +Epoch 9 || Placed WQs / Potential WQs | 72.55 % (74 / 102) | +Epoch 9 |+----------------------------------+--------------------+ +Epoch 9 || Symmetric AQs / All placed AQs | 24.24 % (24 / 99) | +Epoch 9 |+----------------------------------+--------------------+ +Epoch 9 || Asymmetric AQs / All placed AQs | 75.76 % (75 / 99) | +Epoch 9 |+----------------------------------+--------------------+ +Epoch 9 || Signed AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 9 |+----------------------------------+--------------------+ +Epoch 9 || Unsigned AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 9 |+----------------------------------+--------------------+ +Epoch 9 || Per-tensor AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 9 |+----------------------------------+--------------------+ +Epoch 9 || Per-channel AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 9 |+----------------------------------+--------------------+ +Epoch 9 | +Epoch 9 |Statistics of the bitwidth distribution: +Epoch 9 |+--------------+---------------------+--------------------+--------------------+ +Epoch 9 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 9 || | WQs | Placed AQs | Qs | +Epoch 9 |+==============+=====================+====================+====================+ +Epoch 9 || 8 | 100.00 % (74 / 74) | 100.00 % (99 / 99) | 100.00 % (173 / | +Epoch 9 || | | | 173) | +Epoch 9 |+--------------+---------------------+--------------------+--------------------+ +Epoch 9 | +Epoch 9 |Statistics of the sparsified model: +Epoch 9 |+-----------------------------------------+-------+ +Epoch 9 || Statistic's name | Value | +Epoch 9 |+=========================================+=======+ +Epoch 9 || Sparsity level of the whole model | 0.458 | +Epoch 9 |+-----------------------------------------+-------+ +Epoch 9 || Sparsity level of all sparsified layers | 0.590 | +Epoch 9 |+-----------------------------------------+-------+ +Epoch 9 | +Epoch 9 |Statistics by sparsified layers: +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 9 |+======================+================+================+=====================+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.402 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[0]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[qu | | | | +Epoch 9 || ery]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.410 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[0]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[ke | | | | +Epoch 9 || y]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.412 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[0]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[va | | | | +Epoch 9 || lue]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.427 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[0]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfOutput[ou | | | | +Epoch 9 || tput]/NNCFLinear[den | | | | +Epoch 9 || se]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [3072, 768] | 0.680 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[0]/Be | | | | +Epoch 9 || rtIntermediate[inter | | | | +Epoch 9 || mediate]/NNCFLinear[ | | | | +Epoch 9 || dense]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 3072] | 0.692 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[0]/Be | | | | +Epoch 9 || rtOutput[output]/NNC | | | | +Epoch 9 || FLinear[dense]/linea | | | | +Epoch 9 || r_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.398 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[1]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[qu | | | | +Epoch 9 || ery]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.403 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[1]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[ke | | | | +Epoch 9 || y]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.411 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[1]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[va | | | | +Epoch 9 || lue]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.428 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[1]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfOutput[ou | | | | +Epoch 9 || tput]/NNCFLinear[den | | | | +Epoch 9 || se]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [3072, 768] | 0.680 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[1]/Be | | | | +Epoch 9 || rtIntermediate[inter | | | | +Epoch 9 || mediate]/NNCFLinear[ | | | | +Epoch 9 || dense]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 3072] | 0.694 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[1]/Be | | | | +Epoch 9 || rtOutput[output]/NNC | | | | +Epoch 9 || FLinear[dense]/linea | | | | +Epoch 9 || r_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.414 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[2]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[qu | | | | +Epoch 9 || ery]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.415 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[2]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[ke | | | | +Epoch 9 || y]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.416 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[2]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[va | | | | +Epoch 9 || lue]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.425 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[2]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfOutput[ou | | | | +Epoch 9 || tput]/NNCFLinear[den | | | | +Epoch 9 || se]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [3072, 768] | 0.682 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[2]/Be | | | | +Epoch 9 || rtIntermediate[inter | | | | +Epoch 9 || mediate]/NNCFLinear[ | | | | +Epoch 9 || dense]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 3072] | 0.694 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[2]/Be | | | | +Epoch 9 || rtOutput[output]/NNC | | | | +Epoch 9 || FLinear[dense]/linea | | | | +Epoch 9 || r_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.397 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[3]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[qu | | | | +Epoch 9 || ery]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.400 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[3]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[ke | | | | +Epoch 9 || y]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.412 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[3]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[va | | | | +Epoch 9 || lue]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.418 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[3]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfOutput[ou | | | | +Epoch 9 || tput]/NNCFLinear[den | | | | +Epoch 9 || se]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [3072, 768] | 0.682 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[3]/Be | | | | +Epoch 9 || rtIntermediate[inter | | | | +Epoch 9 || mediate]/NNCFLinear[ | | | | +Epoch 9 || dense]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 3072] | 0.698 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[3]/Be | | | | +Epoch 9 || rtOutput[output]/NNC | | | | +Epoch 9 || FLinear[dense]/linea | | | | +Epoch 9 || r_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.394 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[4]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[qu | | | | +Epoch 9 || ery]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.395 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[4]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[ke | | | | +Epoch 9 || y]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.402 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[4]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[va | | | | +Epoch 9 || lue]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.413 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[4]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfOutput[ou | | | | +Epoch 9 || tput]/NNCFLinear[den | | | | +Epoch 9 || se]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [3072, 768] | 0.683 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[4]/Be | | | | +Epoch 9 || rtIntermediate[inter | | | | +Epoch 9 || mediate]/NNCFLinear[ | | | | +Epoch 9 || dense]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 3072] | 0.700 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[4]/Be | | | | +Epoch 9 || rtOutput[output]/NNC | | | | +Epoch 9 || FLinear[dense]/linea | | | | +Epoch 9 || r_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.394 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[5]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[qu | | | | +Epoch 9 || ery]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.395 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[5]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[ke | | | | +Epoch 9 || y]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.408 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[5]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[va | | | | +Epoch 9 || lue]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.413 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[5]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfOutput[ou | | | | +Epoch 9 || tput]/NNCFLinear[den | | | | +Epoch 9 || se]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [3072, 768] | 0.682 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[5]/Be | | | | +Epoch 9 || rtIntermediate[inter | | | | +Epoch 9 || mediate]/NNCFLinear[ | | | | +Epoch 9 || dense]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 3072] | 0.700 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[5]/Be | | | | +Epoch 9 || rtOutput[output]/NNC | | | | +Epoch 9 || FLinear[dense]/linea | | | | +Epoch 9 || r_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.392 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[6]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[qu | | | | +Epoch 9 || ery]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.392 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[6]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[ke | | | | +Epoch 9 || y]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.407 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[6]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[va | | | | +Epoch 9 || lue]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.413 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[6]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfOutput[ou | | | | +Epoch 9 || tput]/NNCFLinear[den | | | | +Epoch 9 || se]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [3072, 768] | 0.681 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[6]/Be | | | | +Epoch 9 || rtIntermediate[inter | | | | +Epoch 9 || mediate]/NNCFLinear[ | | | | +Epoch 9 || dense]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 3072] | 0.696 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[6]/Be | | | | +Epoch 9 || rtOutput[output]/NNC | | | | +Epoch 9 || FLinear[dense]/linea | | | | +Epoch 9 || r_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.391 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[7]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[qu | | | | +Epoch 9 || ery]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.391 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[7]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[ke | | | | +Epoch 9 || y]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.403 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[7]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[va | | | | +Epoch 9 || lue]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.407 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[7]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfOutput[ou | | | | +Epoch 9 || tput]/NNCFLinear[den | | | | +Epoch 9 || se]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [3072, 768] | 0.678 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[7]/Be | | | | +Epoch 9 || rtIntermediate[inter | | | | +Epoch 9 || mediate]/NNCFLinear[ | | | | +Epoch 9 || dense]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 3072] | 0.690 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[7]/Be | | | | +Epoch 9 || rtOutput[output]/NNC | | | | +Epoch 9 || FLinear[dense]/linea | | | | +Epoch 9 || r_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.391 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[8]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[qu | | | | +Epoch 9 || ery]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.391 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[8]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[ke | | | | +Epoch 9 || y]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.395 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[8]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[va | | | | +Epoch 9 || lue]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.399 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[8]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfOutput[ou | | | | +Epoch 9 || tput]/NNCFLinear[den | | | | +Epoch 9 || se]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [3072, 768] | 0.675 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[8]/Be | | | | +Epoch 9 || rtIntermediate[inter | | | | +Epoch 9 || mediate]/NNCFLinear[ | | | | +Epoch 9 || dense]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 3072] | 0.686 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[8]/Be | | | | +Epoch 9 || rtOutput[output]/NNC | | | | +Epoch 9 || FLinear[dense]/linea | | | | +Epoch 9 || r_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.386 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[9]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[qu | | | | +Epoch 9 || ery]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.388 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[9]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[ke | | | | +Epoch 9 || y]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.392 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[9]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfAttention | | | | +Epoch 9 || [self]/NNCFLinear[va | | | | +Epoch 9 || lue]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.393 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[9]/Be | | | | +Epoch 9 || rtAttention[attentio | | | | +Epoch 9 || n]/BertSelfOutput[ou | | | | +Epoch 9 || tput]/NNCFLinear[den | | | | +Epoch 9 || se]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [3072, 768] | 0.679 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[9]/Be | | | | +Epoch 9 || rtIntermediate[inter | | | | +Epoch 9 || mediate]/NNCFLinear[ | | | | +Epoch 9 || dense]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 3072] | 0.689 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[9]/Be | | | | +Epoch 9 || rtOutput[output]/NNC | | | | +Epoch 9 || FLinear[dense]/linea | | | | +Epoch 9 || r_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.385 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[10]/B | | | | +Epoch 9 || ertAttention[attenti | | | | +Epoch 9 || on]/BertSelfAttentio | | | | +Epoch 9 || n[self]/NNCFLinear[q | | | | +Epoch 9 || uery]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.387 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[10]/B | | | | +Epoch 9 || ertAttention[attenti | | | | +Epoch 9 || on]/BertSelfAttentio | | | | +Epoch 9 || n[self]/NNCFLinear[k | | | | +Epoch 9 || ey]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.402 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[10]/B | | | | +Epoch 9 || ertAttention[attenti | | | | +Epoch 9 || on]/BertSelfAttentio | | | | +Epoch 9 || n[self]/NNCFLinear[v | | | | +Epoch 9 || alue]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.398 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[10]/B | | | | +Epoch 9 || ertAttention[attenti | | | | +Epoch 9 || on]/BertSelfOutput[o | | | | +Epoch 9 || utput]/NNCFLinear[de | | | | +Epoch 9 || nse]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [3072, 768] | 0.673 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[10]/B | | | | +Epoch 9 || ertIntermediate[inte | | | | +Epoch 9 || rmediate]/NNCFLinear | | | | +Epoch 9 || [dense]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 3072] | 0.681 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[10]/B | | | | +Epoch 9 || ertOutput[output]/NN | | | | +Epoch 9 || CFLinear[dense]/line | | | | +Epoch 9 || ar_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.387 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[11]/B | | | | +Epoch 9 || ertAttention[attenti | | | | +Epoch 9 || on]/BertSelfAttentio | | | | +Epoch 9 || n[self]/NNCFLinear[q | | | | +Epoch 9 || uery]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.384 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[11]/B | | | | +Epoch 9 || ertAttention[attenti | | | | +Epoch 9 || on]/BertSelfAttentio | | | | +Epoch 9 || n[self]/NNCFLinear[k | | | | +Epoch 9 || ey]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.390 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[11]/B | | | | +Epoch 9 || ertAttention[attenti | | | | +Epoch 9 || on]/BertSelfAttentio | | | | +Epoch 9 || n[self]/NNCFLinear[v | | | | +Epoch 9 || alue]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 768] | 0.386 | 0.694 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[11]/B | | | | +Epoch 9 || ertAttention[attenti | | | | +Epoch 9 || on]/BertSelfOutput[o | | | | +Epoch 9 || utput]/NNCFLinear[de | | | | +Epoch 9 || nse]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [3072, 768] | 0.671 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[11]/B | | | | +Epoch 9 || ertIntermediate[inte | | | | +Epoch 9 || rmediate]/NNCFLinear | | | | +Epoch 9 || [dense]/linear_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 || BertForSequenceClass | [768, 3072] | 0.673 | 2.778 | +Epoch 9 || ification/BertModel[ | | | | +Epoch 9 || bert]/BertEncoder[en | | | | +Epoch 9 || coder]/ModuleList[la | | | | +Epoch 9 || yer]/BertLayer[11]/B | | | | +Epoch 9 || ertOutput[output]/NN | | | | +Epoch 9 || CFLinear[dense]/line | | | | +Epoch 9 || ar_0 | | | | +Epoch 9 |+----------------------+----------------+----------------+---------------------+ +Epoch 9 | +Epoch 9 |Statistics of the magnitude sparsity algorithm: +Epoch 9 |+----------------------------------------------------------------------+-------+ +Epoch 9 || Statistic's name | Value | +Epoch 9 |+======================================================================+=======+ +Epoch 9 || A target level of the sparsity for the algorithm for the current | 0.590 | +Epoch 9 || epoch | | +Epoch 9 |+----------------------------------------------------------------------+-------+ +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || Layer's name | Sparsity threshold | +Epoch 9 |+=========================================================+====================+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 9 || linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 9 || near_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 9 || linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 9 || inear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermed | | +Epoch 9 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[o | | +Epoch 9 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 9 || linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 9 || near_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 9 || linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 9 || inear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermed | | +Epoch 9 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[o | | +Epoch 9 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 9 || linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 9 || near_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 9 || linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 9 || inear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermed | | +Epoch 9 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[o | | +Epoch 9 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 9 || linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 9 || near_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 9 || linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 9 || inear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermed | | +Epoch 9 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[o | | +Epoch 9 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 9 || linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 9 || near_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 9 || linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 9 || inear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermed | | +Epoch 9 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[o | | +Epoch 9 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 9 || linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 9 || near_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 9 || linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 9 || inear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermed | | +Epoch 9 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[o | | +Epoch 9 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 9 || linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 9 || near_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 9 || linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 9 || inear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermed | | +Epoch 9 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[o | | +Epoch 9 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 9 || linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 9 || near_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 9 || linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 9 || inear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermed | | +Epoch 9 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[o | | +Epoch 9 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 9 || linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 9 || near_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 9 || linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 9 || inear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermed | | +Epoch 9 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[o | | +Epoch 9 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 9 || linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 9 || near_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 9 || linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 9 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 9 || inear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermed | | +Epoch 9 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[o | | +Epoch 9 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 9 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 9 || /linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 9 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 9 || inear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 9 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 9 || /linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 9 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 9 || linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertInterme | | +Epoch 9 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[ | | +Epoch 9 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 9 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 9 || /linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 9 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 9 || inear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 9 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 9 || /linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 9 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 9 || linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertInterme | | +Epoch 9 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +Epoch 9 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 9 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[ | | +Epoch 9 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 9 |+---------------------------------------------------------+--------------------+ +INFO:nncf:Statistics of the quantization algorithm: +Epoch 10 |+--------------------------------+-------+ +Epoch 10 || Statistic's name | Value | +Epoch 10 |+================================+=======+ +Epoch 10 || Ratio of enabled quantizations | 100 | +Epoch 10 |+--------------------------------+-------+ +Epoch 10 | +Epoch 10 |Statistics of the quantization share: +Epoch 10 |+----------------------------------+--------------------+ +Epoch 10 || Statistic's name | Value | +Epoch 10 |+==================================+====================+ +Epoch 10 || Symmetric WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 10 |+----------------------------------+--------------------+ +Epoch 10 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 10 |+----------------------------------+--------------------+ +Epoch 10 || Signed WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 10 |+----------------------------------+--------------------+ +Epoch 10 || Unsigned WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 10 |+----------------------------------+--------------------+ +Epoch 10 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 10 |+----------------------------------+--------------------+ +Epoch 10 || Per-channel WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 10 |+----------------------------------+--------------------+ +Epoch 10 || Placed WQs / Potential WQs | 72.55 % (74 / 102) | +Epoch 10 |+----------------------------------+--------------------+ +Epoch 10 || Symmetric AQs / All placed AQs | 24.24 % (24 / 99) | +Epoch 10 |+----------------------------------+--------------------+ +Epoch 10 || Asymmetric AQs / All placed AQs | 75.76 % (75 / 99) | +Epoch 10 |+----------------------------------+--------------------+ +Epoch 10 || Signed AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 10 |+----------------------------------+--------------------+ +Epoch 10 || Unsigned AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 10 |+----------------------------------+--------------------+ +Epoch 10 || Per-tensor AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 10 |+----------------------------------+--------------------+ +Epoch 10 || Per-channel AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 10 |+----------------------------------+--------------------+ +Epoch 10 | +Epoch 10 |Statistics of the bitwidth distribution: +Epoch 10 |+--------------+---------------------+--------------------+--------------------+ +Epoch 10 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 10 || | WQs | Placed AQs | Qs | +Epoch 10 |+==============+=====================+====================+====================+ +Epoch 10 || 8 | 100.00 % (74 / 74) | 100.00 % (99 / 99) | 100.00 % (173 / | +Epoch 10 || | | | 173) | +Epoch 10 |+--------------+---------------------+--------------------+--------------------+ +Epoch 10 | +Epoch 10 |Statistics of the sparsified model: +Epoch 10 |+-----------------------------------------+-------+ +Epoch 10 || Statistic's name | Value | +Epoch 10 |+=========================================+=======+ +Epoch 10 || Sparsity level of the whole model | 0.487 | +Epoch 10 |+-----------------------------------------+-------+ +Epoch 10 || Sparsity level of all sparsified layers | 0.627 | +Epoch 10 |+-----------------------------------------+-------+ +Epoch 10 | +Epoch 10 |Statistics by sparsified layers: +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 10 |+======================+================+================+=====================+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.432 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[0]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[qu | | | | +Epoch 10 || ery]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.440 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[0]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[ke | | | | +Epoch 10 || y]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.442 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[0]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[va | | | | +Epoch 10 || lue]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.458 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[0]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfOutput[ou | | | | +Epoch 10 || tput]/NNCFLinear[den | | | | +Epoch 10 || se]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [3072, 768] | 0.719 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[0]/Be | | | | +Epoch 10 || rtIntermediate[inter | | | | +Epoch 10 || mediate]/NNCFLinear[ | | | | +Epoch 10 || dense]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 3072] | 0.731 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[0]/Be | | | | +Epoch 10 || rtOutput[output]/NNC | | | | +Epoch 10 || FLinear[dense]/linea | | | | +Epoch 10 || r_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.429 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[1]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[qu | | | | +Epoch 10 || ery]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.433 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[1]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[ke | | | | +Epoch 10 || y]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.441 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[1]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[va | | | | +Epoch 10 || lue]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.459 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[1]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfOutput[ou | | | | +Epoch 10 || tput]/NNCFLinear[den | | | | +Epoch 10 || se]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [3072, 768] | 0.720 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[1]/Be | | | | +Epoch 10 || rtIntermediate[inter | | | | +Epoch 10 || mediate]/NNCFLinear[ | | | | +Epoch 10 || dense]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 3072] | 0.733 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[1]/Be | | | | +Epoch 10 || rtOutput[output]/NNC | | | | +Epoch 10 || FLinear[dense]/linea | | | | +Epoch 10 || r_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.444 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[2]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[qu | | | | +Epoch 10 || ery]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.445 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[2]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[ke | | | | +Epoch 10 || y]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.446 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[2]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[va | | | | +Epoch 10 || lue]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.456 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[2]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfOutput[ou | | | | +Epoch 10 || tput]/NNCFLinear[den | | | | +Epoch 10 || se]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [3072, 768] | 0.721 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[2]/Be | | | | +Epoch 10 || rtIntermediate[inter | | | | +Epoch 10 || mediate]/NNCFLinear[ | | | | +Epoch 10 || dense]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 3072] | 0.733 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[2]/Be | | | | +Epoch 10 || rtOutput[output]/NNC | | | | +Epoch 10 || FLinear[dense]/linea | | | | +Epoch 10 || r_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.426 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[3]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[qu | | | | +Epoch 10 || ery]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.429 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[3]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[ke | | | | +Epoch 10 || y]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.441 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[3]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[va | | | | +Epoch 10 || lue]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.449 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[3]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfOutput[ou | | | | +Epoch 10 || tput]/NNCFLinear[den | | | | +Epoch 10 || se]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [3072, 768] | 0.722 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[3]/Be | | | | +Epoch 10 || rtIntermediate[inter | | | | +Epoch 10 || mediate]/NNCFLinear[ | | | | +Epoch 10 || dense]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 3072] | 0.737 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[3]/Be | | | | +Epoch 10 || rtOutput[output]/NNC | | | | +Epoch 10 || FLinear[dense]/linea | | | | +Epoch 10 || r_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.423 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[4]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[qu | | | | +Epoch 10 || ery]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.424 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[4]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[ke | | | | +Epoch 10 || y]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.432 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[4]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[va | | | | +Epoch 10 || lue]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.443 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[4]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfOutput[ou | | | | +Epoch 10 || tput]/NNCFLinear[den | | | | +Epoch 10 || se]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [3072, 768] | 0.723 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[4]/Be | | | | +Epoch 10 || rtIntermediate[inter | | | | +Epoch 10 || mediate]/NNCFLinear[ | | | | +Epoch 10 || dense]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 3072] | 0.740 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[4]/Be | | | | +Epoch 10 || rtOutput[output]/NNC | | | | +Epoch 10 || FLinear[dense]/linea | | | | +Epoch 10 || r_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.422 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[5]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[qu | | | | +Epoch 10 || ery]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.424 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[5]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[ke | | | | +Epoch 10 || y]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.438 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[5]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[va | | | | +Epoch 10 || lue]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.443 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[5]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfOutput[ou | | | | +Epoch 10 || tput]/NNCFLinear[den | | | | +Epoch 10 || se]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [3072, 768] | 0.723 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[5]/Be | | | | +Epoch 10 || rtIntermediate[inter | | | | +Epoch 10 || mediate]/NNCFLinear[ | | | | +Epoch 10 || dense]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 3072] | 0.739 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[5]/Be | | | | +Epoch 10 || rtOutput[output]/NNC | | | | +Epoch 10 || FLinear[dense]/linea | | | | +Epoch 10 || r_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.421 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[6]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[qu | | | | +Epoch 10 || ery]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.422 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[6]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[ke | | | | +Epoch 10 || y]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.437 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[6]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[va | | | | +Epoch 10 || lue]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.443 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[6]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfOutput[ou | | | | +Epoch 10 || tput]/NNCFLinear[den | | | | +Epoch 10 || se]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [3072, 768] | 0.721 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[6]/Be | | | | +Epoch 10 || rtIntermediate[inter | | | | +Epoch 10 || mediate]/NNCFLinear[ | | | | +Epoch 10 || dense]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 3072] | 0.735 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[6]/Be | | | | +Epoch 10 || rtOutput[output]/NNC | | | | +Epoch 10 || FLinear[dense]/linea | | | | +Epoch 10 || r_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.420 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[7]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[qu | | | | +Epoch 10 || ery]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.420 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[7]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[ke | | | | +Epoch 10 || y]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.433 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[7]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[va | | | | +Epoch 10 || lue]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.438 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[7]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfOutput[ou | | | | +Epoch 10 || tput]/NNCFLinear[den | | | | +Epoch 10 || se]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [3072, 768] | 0.718 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[7]/Be | | | | +Epoch 10 || rtIntermediate[inter | | | | +Epoch 10 || mediate]/NNCFLinear[ | | | | +Epoch 10 || dense]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 3072] | 0.730 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[7]/Be | | | | +Epoch 10 || rtOutput[output]/NNC | | | | +Epoch 10 || FLinear[dense]/linea | | | | +Epoch 10 || r_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.420 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[8]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[qu | | | | +Epoch 10 || ery]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.420 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[8]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[ke | | | | +Epoch 10 || y]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.425 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[8]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[va | | | | +Epoch 10 || lue]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.429 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[8]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfOutput[ou | | | | +Epoch 10 || tput]/NNCFLinear[den | | | | +Epoch 10 || se]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [3072, 768] | 0.716 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[8]/Be | | | | +Epoch 10 || rtIntermediate[inter | | | | +Epoch 10 || mediate]/NNCFLinear[ | | | | +Epoch 10 || dense]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 3072] | 0.727 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[8]/Be | | | | +Epoch 10 || rtOutput[output]/NNC | | | | +Epoch 10 || FLinear[dense]/linea | | | | +Epoch 10 || r_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.415 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[9]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[qu | | | | +Epoch 10 || ery]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.417 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[9]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[ke | | | | +Epoch 10 || y]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.423 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[9]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfAttention | | | | +Epoch 10 || [self]/NNCFLinear[va | | | | +Epoch 10 || lue]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.424 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[9]/Be | | | | +Epoch 10 || rtAttention[attentio | | | | +Epoch 10 || n]/BertSelfOutput[ou | | | | +Epoch 10 || tput]/NNCFLinear[den | | | | +Epoch 10 || se]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [3072, 768] | 0.721 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[9]/Be | | | | +Epoch 10 || rtIntermediate[inter | | | | +Epoch 10 || mediate]/NNCFLinear[ | | | | +Epoch 10 || dense]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 3072] | 0.730 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[9]/Be | | | | +Epoch 10 || rtOutput[output]/NNC | | | | +Epoch 10 || FLinear[dense]/linea | | | | +Epoch 10 || r_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.417 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[10]/B | | | | +Epoch 10 || ertAttention[attenti | | | | +Epoch 10 || on]/BertSelfAttentio | | | | +Epoch 10 || n[self]/NNCFLinear[q | | | | +Epoch 10 || uery]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.418 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[10]/B | | | | +Epoch 10 || ertAttention[attenti | | | | +Epoch 10 || on]/BertSelfAttentio | | | | +Epoch 10 || n[self]/NNCFLinear[k | | | | +Epoch 10 || ey]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.434 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[10]/B | | | | +Epoch 10 || ertAttention[attenti | | | | +Epoch 10 || on]/BertSelfAttentio | | | | +Epoch 10 || n[self]/NNCFLinear[v | | | | +Epoch 10 || alue]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.429 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[10]/B | | | | +Epoch 10 || ertAttention[attenti | | | | +Epoch 10 || on]/BertSelfOutput[o | | | | +Epoch 10 || utput]/NNCFLinear[de | | | | +Epoch 10 || nse]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [3072, 768] | 0.715 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[10]/B | | | | +Epoch 10 || ertIntermediate[inte | | | | +Epoch 10 || rmediate]/NNCFLinear | | | | +Epoch 10 || [dense]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 3072] | 0.722 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[10]/B | | | | +Epoch 10 || ertOutput[output]/NN | | | | +Epoch 10 || CFLinear[dense]/line | | | | +Epoch 10 || ar_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.417 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[11]/B | | | | +Epoch 10 || ertAttention[attenti | | | | +Epoch 10 || on]/BertSelfAttentio | | | | +Epoch 10 || n[self]/NNCFLinear[q | | | | +Epoch 10 || uery]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.414 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[11]/B | | | | +Epoch 10 || ertAttention[attenti | | | | +Epoch 10 || on]/BertSelfAttentio | | | | +Epoch 10 || n[self]/NNCFLinear[k | | | | +Epoch 10 || ey]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.421 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[11]/B | | | | +Epoch 10 || ertAttention[attenti | | | | +Epoch 10 || on]/BertSelfAttentio | | | | +Epoch 10 || n[self]/NNCFLinear[v | | | | +Epoch 10 || alue]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 768] | 0.416 | 0.694 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[11]/B | | | | +Epoch 10 || ertAttention[attenti | | | | +Epoch 10 || on]/BertSelfOutput[o | | | | +Epoch 10 || utput]/NNCFLinear[de | | | | +Epoch 10 || nse]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [3072, 768] | 0.713 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[11]/B | | | | +Epoch 10 || ertIntermediate[inte | | | | +Epoch 10 || rmediate]/NNCFLinear | | | | +Epoch 10 || [dense]/linear_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 || BertForSequenceClass | [768, 3072] | 0.716 | 2.778 | +Epoch 10 || ification/BertModel[ | | | | +Epoch 10 || bert]/BertEncoder[en | | | | +Epoch 10 || coder]/ModuleList[la | | | | +Epoch 10 || yer]/BertLayer[11]/B | | | | +Epoch 10 || ertOutput[output]/NN | | | | +Epoch 10 || CFLinear[dense]/line | | | | +Epoch 10 || ar_0 | | | | +Epoch 10 |+----------------------+----------------+----------------+---------------------+ +Epoch 10 | +Epoch 10 |Statistics of the magnitude sparsity algorithm: +Epoch 10 |+----------------------------------------------------------------------+-------+ +Epoch 10 || Statistic's name | Value | +Epoch 10 |+======================================================================+=======+ +Epoch 10 || A target level of the sparsity for the algorithm for the current | 0.627 | +Epoch 10 || epoch | | +Epoch 10 |+----------------------------------------------------------------------+-------+ +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || Layer's name | Sparsity threshold | +Epoch 10 |+=========================================================+====================+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 10 || linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 10 || near_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 10 || linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 10 || inear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermed | | +Epoch 10 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[o | | +Epoch 10 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 10 || linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 10 || near_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 10 || linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 10 || inear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermed | | +Epoch 10 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[o | | +Epoch 10 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 10 || linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 10 || near_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 10 || linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 10 || inear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermed | | +Epoch 10 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[o | | +Epoch 10 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 10 || linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 10 || near_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 10 || linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 10 || inear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermed | | +Epoch 10 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[o | | +Epoch 10 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 10 || linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 10 || near_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 10 || linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 10 || inear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermed | | +Epoch 10 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[o | | +Epoch 10 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 10 || linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 10 || near_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 10 || linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 10 || inear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermed | | +Epoch 10 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[o | | +Epoch 10 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 10 || linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 10 || near_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 10 || linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 10 || inear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermed | | +Epoch 10 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[o | | +Epoch 10 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 10 || linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 10 || near_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 10 || linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 10 || inear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermed | | +Epoch 10 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[o | | +Epoch 10 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 10 || linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 10 || near_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 10 || linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 10 || inear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermed | | +Epoch 10 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[o | | +Epoch 10 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 10 || linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 10 || near_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 10 || linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 10 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 10 || inear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermed | | +Epoch 10 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[o | | +Epoch 10 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 10 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 10 || /linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 10 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 10 || inear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 10 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 10 || /linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 10 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 10 || linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertInterme | | +Epoch 10 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[ | | +Epoch 10 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 10 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 10 || /linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 10 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 10 || inear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 10 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 10 || /linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 10 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 10 || linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertInterme | | +Epoch 10 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +Epoch 10 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 10 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[ | | +Epoch 10 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 10 |+---------------------------------------------------------+--------------------+ +INFO:nncf:Statistics of the quantization algorithm: +Epoch 11 |+--------------------------------+-------+ +Epoch 11 || Statistic's name | Value | +Epoch 11 |+================================+=======+ +Epoch 11 || Ratio of enabled quantizations | 100 | +Epoch 11 |+--------------------------------+-------+ +Epoch 11 | +Epoch 11 |Statistics of the quantization share: +Epoch 11 |+----------------------------------+--------------------+ +Epoch 11 || Statistic's name | Value | +Epoch 11 |+==================================+====================+ +Epoch 11 || Symmetric WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 11 |+----------------------------------+--------------------+ +Epoch 11 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 11 |+----------------------------------+--------------------+ +Epoch 11 || Signed WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 11 |+----------------------------------+--------------------+ +Epoch 11 || Unsigned WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 11 |+----------------------------------+--------------------+ +Epoch 11 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 11 |+----------------------------------+--------------------+ +Epoch 11 || Per-channel WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 11 |+----------------------------------+--------------------+ +Epoch 11 || Placed WQs / Potential WQs | 72.55 % (74 / 102) | +Epoch 11 |+----------------------------------+--------------------+ +Epoch 11 || Symmetric AQs / All placed AQs | 24.24 % (24 / 99) | +Epoch 11 |+----------------------------------+--------------------+ +Epoch 11 || Asymmetric AQs / All placed AQs | 75.76 % (75 / 99) | +Epoch 11 |+----------------------------------+--------------------+ +Epoch 11 || Signed AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 11 |+----------------------------------+--------------------+ +Epoch 11 || Unsigned AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 11 |+----------------------------------+--------------------+ +Epoch 11 || Per-tensor AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 11 |+----------------------------------+--------------------+ +Epoch 11 || Per-channel AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 11 |+----------------------------------+--------------------+ +Epoch 11 | +Epoch 11 |Statistics of the bitwidth distribution: +Epoch 11 |+--------------+---------------------+--------------------+--------------------+ +Epoch 11 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 11 || | WQs | Placed AQs | Qs | +Epoch 11 |+==============+=====================+====================+====================+ +Epoch 11 || 8 | 100.00 % (74 / 74) | 100.00 % (99 / 99) | 100.00 % (173 / | +Epoch 11 || | | | 173) | +Epoch 11 |+--------------+---------------------+--------------------+--------------------+ +Epoch 11 | +Epoch 11 |Statistics of the sparsified model: +Epoch 11 |+-----------------------------------------+-------+ +Epoch 11 || Statistic's name | Value | +Epoch 11 |+=========================================+=======+ +Epoch 11 || Sparsity level of the whole model | 0.512 | +Epoch 11 |+-----------------------------------------+-------+ +Epoch 11 || Sparsity level of all sparsified layers | 0.659 | +Epoch 11 |+-----------------------------------------+-------+ +Epoch 11 | +Epoch 11 |Statistics by sparsified layers: +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 11 |+======================+================+================+=====================+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.457 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[0]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[qu | | | | +Epoch 11 || ery]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.466 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[0]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[ke | | | | +Epoch 11 || y]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.465 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[0]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[va | | | | +Epoch 11 || lue]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.481 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[0]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfOutput[ou | | | | +Epoch 11 || tput]/NNCFLinear[den | | | | +Epoch 11 || se]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [3072, 768] | 0.755 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[0]/Be | | | | +Epoch 11 || rtIntermediate[inter | | | | +Epoch 11 || mediate]/NNCFLinear[ | | | | +Epoch 11 || dense]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 3072] | 0.766 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[0]/Be | | | | +Epoch 11 || rtOutput[output]/NNC | | | | +Epoch 11 || FLinear[dense]/linea | | | | +Epoch 11 || r_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.454 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[1]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[qu | | | | +Epoch 11 || ery]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.458 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[1]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[ke | | | | +Epoch 11 || y]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.464 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[1]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[va | | | | +Epoch 11 || lue]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.483 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[1]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfOutput[ou | | | | +Epoch 11 || tput]/NNCFLinear[den | | | | +Epoch 11 || se]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [3072, 768] | 0.756 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[1]/Be | | | | +Epoch 11 || rtIntermediate[inter | | | | +Epoch 11 || mediate]/NNCFLinear[ | | | | +Epoch 11 || dense]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 3072] | 0.769 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[1]/Be | | | | +Epoch 11 || rtOutput[output]/NNC | | | | +Epoch 11 || FLinear[dense]/linea | | | | +Epoch 11 || r_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.471 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[2]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[qu | | | | +Epoch 11 || ery]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.472 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[2]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[ke | | | | +Epoch 11 || y]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.469 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[2]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[va | | | | +Epoch 11 || lue]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.479 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[2]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfOutput[ou | | | | +Epoch 11 || tput]/NNCFLinear[den | | | | +Epoch 11 || se]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [3072, 768] | 0.757 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[2]/Be | | | | +Epoch 11 || rtIntermediate[inter | | | | +Epoch 11 || mediate]/NNCFLinear[ | | | | +Epoch 11 || dense]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 3072] | 0.768 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[2]/Be | | | | +Epoch 11 || rtOutput[output]/NNC | | | | +Epoch 11 || FLinear[dense]/linea | | | | +Epoch 11 || r_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.452 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[3]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[qu | | | | +Epoch 11 || ery]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.453 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[3]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[ke | | | | +Epoch 11 || y]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.465 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[3]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[va | | | | +Epoch 11 || lue]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.473 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[3]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfOutput[ou | | | | +Epoch 11 || tput]/NNCFLinear[den | | | | +Epoch 11 || se]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [3072, 768] | 0.758 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[3]/Be | | | | +Epoch 11 || rtIntermediate[inter | | | | +Epoch 11 || mediate]/NNCFLinear[ | | | | +Epoch 11 || dense]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 3072] | 0.772 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[3]/Be | | | | +Epoch 11 || rtOutput[output]/NNC | | | | +Epoch 11 || FLinear[dense]/linea | | | | +Epoch 11 || r_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.449 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[4]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[qu | | | | +Epoch 11 || ery]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.450 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[4]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[ke | | | | +Epoch 11 || y]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.457 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[4]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[va | | | | +Epoch 11 || lue]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.468 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[4]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfOutput[ou | | | | +Epoch 11 || tput]/NNCFLinear[den | | | | +Epoch 11 || se]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [3072, 768] | 0.758 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[4]/Be | | | | +Epoch 11 || rtIntermediate[inter | | | | +Epoch 11 || mediate]/NNCFLinear[ | | | | +Epoch 11 || dense]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 3072] | 0.774 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[4]/Be | | | | +Epoch 11 || rtOutput[output]/NNC | | | | +Epoch 11 || FLinear[dense]/linea | | | | +Epoch 11 || r_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.448 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[5]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[qu | | | | +Epoch 11 || ery]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.449 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[5]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[ke | | | | +Epoch 11 || y]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.462 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[5]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[va | | | | +Epoch 11 || lue]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.468 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[5]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfOutput[ou | | | | +Epoch 11 || tput]/NNCFLinear[den | | | | +Epoch 11 || se]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [3072, 768] | 0.758 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[5]/Be | | | | +Epoch 11 || rtIntermediate[inter | | | | +Epoch 11 || mediate]/NNCFLinear[ | | | | +Epoch 11 || dense]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 3072] | 0.773 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[5]/Be | | | | +Epoch 11 || rtOutput[output]/NNC | | | | +Epoch 11 || FLinear[dense]/linea | | | | +Epoch 11 || r_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.446 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[6]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[qu | | | | +Epoch 11 || ery]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.447 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[6]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[ke | | | | +Epoch 11 || y]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.462 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[6]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[va | | | | +Epoch 11 || lue]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.468 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[6]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfOutput[ou | | | | +Epoch 11 || tput]/NNCFLinear[den | | | | +Epoch 11 || se]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [3072, 768] | 0.757 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[6]/Be | | | | +Epoch 11 || rtIntermediate[inter | | | | +Epoch 11 || mediate]/NNCFLinear[ | | | | +Epoch 11 || dense]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 3072] | 0.770 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[6]/Be | | | | +Epoch 11 || rtOutput[output]/NNC | | | | +Epoch 11 || FLinear[dense]/linea | | | | +Epoch 11 || r_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.445 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[7]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[qu | | | | +Epoch 11 || ery]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.446 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[7]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[ke | | | | +Epoch 11 || y]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.457 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[7]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[va | | | | +Epoch 11 || lue]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.463 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[7]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfOutput[ou | | | | +Epoch 11 || tput]/NNCFLinear[den | | | | +Epoch 11 || se]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [3072, 768] | 0.755 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[7]/Be | | | | +Epoch 11 || rtIntermediate[inter | | | | +Epoch 11 || mediate]/NNCFLinear[ | | | | +Epoch 11 || dense]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 3072] | 0.765 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[7]/Be | | | | +Epoch 11 || rtOutput[output]/NNC | | | | +Epoch 11 || FLinear[dense]/linea | | | | +Epoch 11 || r_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.445 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[8]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[qu | | | | +Epoch 11 || ery]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.446 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[8]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[ke | | | | +Epoch 11 || y]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.450 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[8]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[va | | | | +Epoch 11 || lue]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.455 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[8]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfOutput[ou | | | | +Epoch 11 || tput]/NNCFLinear[den | | | | +Epoch 11 || se]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [3072, 768] | 0.753 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[8]/Be | | | | +Epoch 11 || rtIntermediate[inter | | | | +Epoch 11 || mediate]/NNCFLinear[ | | | | +Epoch 11 || dense]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 3072] | 0.762 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[8]/Be | | | | +Epoch 11 || rtOutput[output]/NNC | | | | +Epoch 11 || FLinear[dense]/linea | | | | +Epoch 11 || r_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.441 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[9]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[qu | | | | +Epoch 11 || ery]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.444 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[9]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[ke | | | | +Epoch 11 || y]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.448 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[9]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfAttention | | | | +Epoch 11 || [self]/NNCFLinear[va | | | | +Epoch 11 || lue]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.450 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[9]/Be | | | | +Epoch 11 || rtAttention[attentio | | | | +Epoch 11 || n]/BertSelfOutput[ou | | | | +Epoch 11 || tput]/NNCFLinear[den | | | | +Epoch 11 || se]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [3072, 768] | 0.758 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[9]/Be | | | | +Epoch 11 || rtIntermediate[inter | | | | +Epoch 11 || mediate]/NNCFLinear[ | | | | +Epoch 11 || dense]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 3072] | 0.766 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[9]/Be | | | | +Epoch 11 || rtOutput[output]/NNC | | | | +Epoch 11 || FLinear[dense]/linea | | | | +Epoch 11 || r_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.443 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[10]/B | | | | +Epoch 11 || ertAttention[attenti | | | | +Epoch 11 || on]/BertSelfAttentio | | | | +Epoch 11 || n[self]/NNCFLinear[q | | | | +Epoch 11 || uery]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.444 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[10]/B | | | | +Epoch 11 || ertAttention[attenti | | | | +Epoch 11 || on]/BertSelfAttentio | | | | +Epoch 11 || n[self]/NNCFLinear[k | | | | +Epoch 11 || ey]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.460 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[10]/B | | | | +Epoch 11 || ertAttention[attenti | | | | +Epoch 11 || on]/BertSelfAttentio | | | | +Epoch 11 || n[self]/NNCFLinear[v | | | | +Epoch 11 || alue]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.455 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[10]/B | | | | +Epoch 11 || ertAttention[attenti | | | | +Epoch 11 || on]/BertSelfOutput[o | | | | +Epoch 11 || utput]/NNCFLinear[de | | | | +Epoch 11 || nse]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [3072, 768] | 0.752 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[10]/B | | | | +Epoch 11 || ertIntermediate[inte | | | | +Epoch 11 || rmediate]/NNCFLinear | | | | +Epoch 11 || [dense]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 3072] | 0.759 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[10]/B | | | | +Epoch 11 || ertOutput[output]/NN | | | | +Epoch 11 || CFLinear[dense]/line | | | | +Epoch 11 || ar_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.445 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[11]/B | | | | +Epoch 11 || ertAttention[attenti | | | | +Epoch 11 || on]/BertSelfAttentio | | | | +Epoch 11 || n[self]/NNCFLinear[q | | | | +Epoch 11 || uery]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.441 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[11]/B | | | | +Epoch 11 || ertAttention[attenti | | | | +Epoch 11 || on]/BertSelfAttentio | | | | +Epoch 11 || n[self]/NNCFLinear[k | | | | +Epoch 11 || ey]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.449 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[11]/B | | | | +Epoch 11 || ertAttention[attenti | | | | +Epoch 11 || on]/BertSelfAttentio | | | | +Epoch 11 || n[self]/NNCFLinear[v | | | | +Epoch 11 || alue]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 768] | 0.443 | 0.694 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[11]/B | | | | +Epoch 11 || ertAttention[attenti | | | | +Epoch 11 || on]/BertSelfOutput[o | | | | +Epoch 11 || utput]/NNCFLinear[de | | | | +Epoch 11 || nse]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [3072, 768] | 0.750 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[11]/B | | | | +Epoch 11 || ertIntermediate[inte | | | | +Epoch 11 || rmediate]/NNCFLinear | | | | +Epoch 11 || [dense]/linear_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 || BertForSequenceClass | [768, 3072] | 0.754 | 2.778 | +Epoch 11 || ification/BertModel[ | | | | +Epoch 11 || bert]/BertEncoder[en | | | | +Epoch 11 || coder]/ModuleList[la | | | | +Epoch 11 || yer]/BertLayer[11]/B | | | | +Epoch 11 || ertOutput[output]/NN | | | | +Epoch 11 || CFLinear[dense]/line | | | | +Epoch 11 || ar_0 | | | | +Epoch 11 |+----------------------+----------------+----------------+---------------------+ +Epoch 11 | +Epoch 11 |Statistics of the magnitude sparsity algorithm: +Epoch 11 |+----------------------------------------------------------------------+-------+ +Epoch 11 || Statistic's name | Value | +Epoch 11 |+======================================================================+=======+ +Epoch 11 || A target level of the sparsity for the algorithm for the current | 0.660 | +Epoch 11 || epoch | | +Epoch 11 |+----------------------------------------------------------------------+-------+ +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || Layer's name | Sparsity threshold | +Epoch 11 |+=========================================================+====================+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 11 || linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 11 || near_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 11 || linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 11 || inear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermed | | +Epoch 11 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[o | | +Epoch 11 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 11 || linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 11 || near_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 11 || linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 11 || inear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermed | | +Epoch 11 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[o | | +Epoch 11 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 11 || linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 11 || near_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 11 || linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 11 || inear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermed | | +Epoch 11 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[o | | +Epoch 11 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 11 || linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 11 || near_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 11 || linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 11 || inear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermed | | +Epoch 11 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[o | | +Epoch 11 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 11 || linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 11 || near_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 11 || linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 11 || inear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermed | | +Epoch 11 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[o | | +Epoch 11 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 11 || linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 11 || near_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 11 || linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 11 || inear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermed | | +Epoch 11 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[o | | +Epoch 11 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 11 || linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 11 || near_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 11 || linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 11 || inear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermed | | +Epoch 11 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[o | | +Epoch 11 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 11 || linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 11 || near_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 11 || linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 11 || inear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermed | | +Epoch 11 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[o | | +Epoch 11 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 11 || linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 11 || near_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 11 || linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 11 || inear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermed | | +Epoch 11 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[o | | +Epoch 11 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 11 || linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 11 || near_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 11 || linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 11 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 11 || inear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermed | | +Epoch 11 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[o | | +Epoch 11 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 11 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 11 || /linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 11 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 11 || inear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 11 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 11 || /linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 11 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 11 || linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertInterme | | +Epoch 11 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[ | | +Epoch 11 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 11 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 11 || /linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 11 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 11 || inear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 11 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 11 || /linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 11 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 11 || linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertInterme | | +Epoch 11 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +Epoch 11 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 11 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[ | | +Epoch 11 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 11 |+---------------------------------------------------------+--------------------+ +INFO:nncf:Statistics of the quantization algorithm: +Epoch 12 |+--------------------------------+-------+ +Epoch 12 || Statistic's name | Value | +Epoch 12 |+================================+=======+ +Epoch 12 || Ratio of enabled quantizations | 100 | +Epoch 12 |+--------------------------------+-------+ +Epoch 12 | +Epoch 12 |Statistics of the quantization share: +Epoch 12 |+----------------------------------+--------------------+ +Epoch 12 || Statistic's name | Value | +Epoch 12 |+==================================+====================+ +Epoch 12 || Symmetric WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 12 |+----------------------------------+--------------------+ +Epoch 12 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 12 |+----------------------------------+--------------------+ +Epoch 12 || Signed WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 12 |+----------------------------------+--------------------+ +Epoch 12 || Unsigned WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 12 |+----------------------------------+--------------------+ +Epoch 12 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 12 |+----------------------------------+--------------------+ +Epoch 12 || Per-channel WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 12 |+----------------------------------+--------------------+ +Epoch 12 || Placed WQs / Potential WQs | 72.55 % (74 / 102) | +Epoch 12 |+----------------------------------+--------------------+ +Epoch 12 || Symmetric AQs / All placed AQs | 24.24 % (24 / 99) | +Epoch 12 |+----------------------------------+--------------------+ +Epoch 12 || Asymmetric AQs / All placed AQs | 75.76 % (75 / 99) | +Epoch 12 |+----------------------------------+--------------------+ +Epoch 12 || Signed AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 12 |+----------------------------------+--------------------+ +Epoch 12 || Unsigned AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 12 |+----------------------------------+--------------------+ +Epoch 12 || Per-tensor AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 12 |+----------------------------------+--------------------+ +Epoch 12 || Per-channel AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 12 |+----------------------------------+--------------------+ +Epoch 12 | +Epoch 12 |Statistics of the bitwidth distribution: +Epoch 12 |+--------------+---------------------+--------------------+--------------------+ +Epoch 12 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 12 || | WQs | Placed AQs | Qs | +Epoch 12 |+==============+=====================+====================+====================+ +Epoch 12 || 8 | 100.00 % (74 / 74) | 100.00 % (99 / 99) | 100.00 % (173 / | +Epoch 12 || | | | 173) | +Epoch 12 |+--------------+---------------------+--------------------+--------------------+ +Epoch 12 | +Epoch 12 |Statistics of the sparsified model: +Epoch 12 |+-----------------------------------------+-------+ +Epoch 12 || Statistic's name | Value | +Epoch 12 |+=========================================+=======+ +Epoch 12 || Sparsity level of the whole model | 0.533 | +Epoch 12 |+-----------------------------------------+-------+ +Epoch 12 || Sparsity level of all sparsified layers | 0.688 | +Epoch 12 |+-----------------------------------------+-------+ +Epoch 12 | +Epoch 12 |Statistics by sparsified layers: +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 12 |+======================+================+================+=====================+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.481 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[0]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[qu | | | | +Epoch 12 || ery]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.489 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[0]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[ke | | | | +Epoch 12 || y]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.483 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[0]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[va | | | | +Epoch 12 || lue]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.500 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[0]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfOutput[ou | | | | +Epoch 12 || tput]/NNCFLinear[den | | | | +Epoch 12 || se]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [3072, 768] | 0.786 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[0]/Be | | | | +Epoch 12 || rtIntermediate[inter | | | | +Epoch 12 || mediate]/NNCFLinear[ | | | | +Epoch 12 || dense]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 3072] | 0.796 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[0]/Be | | | | +Epoch 12 || rtOutput[output]/NNC | | | | +Epoch 12 || FLinear[dense]/linea | | | | +Epoch 12 || r_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.477 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[1]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[qu | | | | +Epoch 12 || ery]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.481 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[1]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[ke | | | | +Epoch 12 || y]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.483 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[1]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[va | | | | +Epoch 12 || lue]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.502 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[1]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfOutput[ou | | | | +Epoch 12 || tput]/NNCFLinear[den | | | | +Epoch 12 || se]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [3072, 768] | 0.787 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[1]/Be | | | | +Epoch 12 || rtIntermediate[inter | | | | +Epoch 12 || mediate]/NNCFLinear[ | | | | +Epoch 12 || dense]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 3072] | 0.799 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[1]/Be | | | | +Epoch 12 || rtOutput[output]/NNC | | | | +Epoch 12 || FLinear[dense]/linea | | | | +Epoch 12 || r_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.496 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[2]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[qu | | | | +Epoch 12 || ery]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.496 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[2]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[ke | | | | +Epoch 12 || y]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.487 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[2]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[va | | | | +Epoch 12 || lue]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.497 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[2]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfOutput[ou | | | | +Epoch 12 || tput]/NNCFLinear[den | | | | +Epoch 12 || se]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [3072, 768] | 0.788 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[2]/Be | | | | +Epoch 12 || rtIntermediate[inter | | | | +Epoch 12 || mediate]/NNCFLinear[ | | | | +Epoch 12 || dense]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 3072] | 0.798 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[2]/Be | | | | +Epoch 12 || rtOutput[output]/NNC | | | | +Epoch 12 || FLinear[dense]/linea | | | | +Epoch 12 || r_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.475 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[3]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[qu | | | | +Epoch 12 || ery]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.476 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[3]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[ke | | | | +Epoch 12 || y]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.485 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[3]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[va | | | | +Epoch 12 || lue]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.492 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[3]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfOutput[ou | | | | +Epoch 12 || tput]/NNCFLinear[den | | | | +Epoch 12 || se]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [3072, 768] | 0.789 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[3]/Be | | | | +Epoch 12 || rtIntermediate[inter | | | | +Epoch 12 || mediate]/NNCFLinear[ | | | | +Epoch 12 || dense]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 3072] | 0.802 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[3]/Be | | | | +Epoch 12 || rtOutput[output]/NNC | | | | +Epoch 12 || FLinear[dense]/linea | | | | +Epoch 12 || r_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.472 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[4]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[qu | | | | +Epoch 12 || ery]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.472 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[4]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[ke | | | | +Epoch 12 || y]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.479 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[4]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[va | | | | +Epoch 12 || lue]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.489 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[4]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfOutput[ou | | | | +Epoch 12 || tput]/NNCFLinear[den | | | | +Epoch 12 || se]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [3072, 768] | 0.789 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[4]/Be | | | | +Epoch 12 || rtIntermediate[inter | | | | +Epoch 12 || mediate]/NNCFLinear[ | | | | +Epoch 12 || dense]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 3072] | 0.803 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[4]/Be | | | | +Epoch 12 || rtOutput[output]/NNC | | | | +Epoch 12 || FLinear[dense]/linea | | | | +Epoch 12 || r_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.471 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[5]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[qu | | | | +Epoch 12 || ery]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.472 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[5]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[ke | | | | +Epoch 12 || y]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.483 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[5]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[va | | | | +Epoch 12 || lue]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.488 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[5]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfOutput[ou | | | | +Epoch 12 || tput]/NNCFLinear[den | | | | +Epoch 12 || se]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [3072, 768] | 0.789 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[5]/Be | | | | +Epoch 12 || rtIntermediate[inter | | | | +Epoch 12 || mediate]/NNCFLinear[ | | | | +Epoch 12 || dense]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 3072] | 0.803 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[5]/Be | | | | +Epoch 12 || rtOutput[output]/NNC | | | | +Epoch 12 || FLinear[dense]/linea | | | | +Epoch 12 || r_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.469 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[6]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[qu | | | | +Epoch 12 || ery]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.470 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[6]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[ke | | | | +Epoch 12 || y]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.483 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[6]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[va | | | | +Epoch 12 || lue]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.489 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[6]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfOutput[ou | | | | +Epoch 12 || tput]/NNCFLinear[den | | | | +Epoch 12 || se]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [3072, 768] | 0.788 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[6]/Be | | | | +Epoch 12 || rtIntermediate[inter | | | | +Epoch 12 || mediate]/NNCFLinear[ | | | | +Epoch 12 || dense]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 3072] | 0.800 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[6]/Be | | | | +Epoch 12 || rtOutput[output]/NNC | | | | +Epoch 12 || FLinear[dense]/linea | | | | +Epoch 12 || r_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.469 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[7]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[qu | | | | +Epoch 12 || ery]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.470 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[7]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[ke | | | | +Epoch 12 || y]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.478 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[7]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[va | | | | +Epoch 12 || lue]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.484 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[7]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfOutput[ou | | | | +Epoch 12 || tput]/NNCFLinear[den | | | | +Epoch 12 || se]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [3072, 768] | 0.786 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[7]/Be | | | | +Epoch 12 || rtIntermediate[inter | | | | +Epoch 12 || mediate]/NNCFLinear[ | | | | +Epoch 12 || dense]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 3072] | 0.796 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[7]/Be | | | | +Epoch 12 || rtOutput[output]/NNC | | | | +Epoch 12 || FLinear[dense]/linea | | | | +Epoch 12 || r_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.469 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[8]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[qu | | | | +Epoch 12 || ery]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.470 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[8]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[ke | | | | +Epoch 12 || y]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.473 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[8]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[va | | | | +Epoch 12 || lue]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.477 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[8]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfOutput[ou | | | | +Epoch 12 || tput]/NNCFLinear[den | | | | +Epoch 12 || se]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [3072, 768] | 0.785 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[8]/Be | | | | +Epoch 12 || rtIntermediate[inter | | | | +Epoch 12 || mediate]/NNCFLinear[ | | | | +Epoch 12 || dense]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 3072] | 0.793 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[8]/Be | | | | +Epoch 12 || rtOutput[output]/NNC | | | | +Epoch 12 || FLinear[dense]/linea | | | | +Epoch 12 || r_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.466 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[9]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[qu | | | | +Epoch 12 || ery]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.468 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[9]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[ke | | | | +Epoch 12 || y]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.471 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[9]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfAttention | | | | +Epoch 12 || [self]/NNCFLinear[va | | | | +Epoch 12 || lue]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.472 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[9]/Be | | | | +Epoch 12 || rtAttention[attentio | | | | +Epoch 12 || n]/BertSelfOutput[ou | | | | +Epoch 12 || tput]/NNCFLinear[den | | | | +Epoch 12 || se]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [3072, 768] | 0.789 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[9]/Be | | | | +Epoch 12 || rtIntermediate[inter | | | | +Epoch 12 || mediate]/NNCFLinear[ | | | | +Epoch 12 || dense]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 3072] | 0.797 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[9]/Be | | | | +Epoch 12 || rtOutput[output]/NNC | | | | +Epoch 12 || FLinear[dense]/linea | | | | +Epoch 12 || r_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.468 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[10]/B | | | | +Epoch 12 || ertAttention[attenti | | | | +Epoch 12 || on]/BertSelfAttentio | | | | +Epoch 12 || n[self]/NNCFLinear[q | | | | +Epoch 12 || uery]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.469 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[10]/B | | | | +Epoch 12 || ertAttention[attenti | | | | +Epoch 12 || on]/BertSelfAttentio | | | | +Epoch 12 || n[self]/NNCFLinear[k | | | | +Epoch 12 || ey]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.484 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[10]/B | | | | +Epoch 12 || ertAttention[attenti | | | | +Epoch 12 || on]/BertSelfAttentio | | | | +Epoch 12 || n[self]/NNCFLinear[v | | | | +Epoch 12 || alue]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.478 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[10]/B | | | | +Epoch 12 || ertAttention[attenti | | | | +Epoch 12 || on]/BertSelfOutput[o | | | | +Epoch 12 || utput]/NNCFLinear[de | | | | +Epoch 12 || nse]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [3072, 768] | 0.784 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[10]/B | | | | +Epoch 12 || ertIntermediate[inte | | | | +Epoch 12 || rmediate]/NNCFLinear | | | | +Epoch 12 || [dense]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 3072] | 0.791 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[10]/B | | | | +Epoch 12 || ertOutput[output]/NN | | | | +Epoch 12 || CFLinear[dense]/line | | | | +Epoch 12 || ar_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.471 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[11]/B | | | | +Epoch 12 || ertAttention[attenti | | | | +Epoch 12 || on]/BertSelfAttentio | | | | +Epoch 12 || n[self]/NNCFLinear[q | | | | +Epoch 12 || uery]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.466 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[11]/B | | | | +Epoch 12 || ertAttention[attenti | | | | +Epoch 12 || on]/BertSelfAttentio | | | | +Epoch 12 || n[self]/NNCFLinear[k | | | | +Epoch 12 || ey]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.473 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[11]/B | | | | +Epoch 12 || ertAttention[attenti | | | | +Epoch 12 || on]/BertSelfAttentio | | | | +Epoch 12 || n[self]/NNCFLinear[v | | | | +Epoch 12 || alue]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 768] | 0.468 | 0.694 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[11]/B | | | | +Epoch 12 || ertAttention[attenti | | | | +Epoch 12 || on]/BertSelfOutput[o | | | | +Epoch 12 || utput]/NNCFLinear[de | | | | +Epoch 12 || nse]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [3072, 768] | 0.783 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[11]/B | | | | +Epoch 12 || ertIntermediate[inte | | | | +Epoch 12 || rmediate]/NNCFLinear | | | | +Epoch 12 || [dense]/linear_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 || BertForSequenceClass | [768, 3072] | 0.786 | 2.778 | +Epoch 12 || ification/BertModel[ | | | | +Epoch 12 || bert]/BertEncoder[en | | | | +Epoch 12 || coder]/ModuleList[la | | | | +Epoch 12 || yer]/BertLayer[11]/B | | | | +Epoch 12 || ertOutput[output]/NN | | | | +Epoch 12 || CFLinear[dense]/line | | | | +Epoch 12 || ar_0 | | | | +Epoch 12 |+----------------------+----------------+----------------+---------------------+ +Epoch 12 | +Epoch 12 |Statistics of the magnitude sparsity algorithm: +Epoch 12 |+----------------------------------------------------------------------+-------+ +Epoch 12 || Statistic's name | Value | +Epoch 12 |+======================================================================+=======+ +Epoch 12 || A target level of the sparsity for the algorithm for the current | 0.688 | +Epoch 12 || epoch | | +Epoch 12 |+----------------------------------------------------------------------+-------+ +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || Layer's name | Sparsity threshold | +Epoch 12 |+=========================================================+====================+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 12 || linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 12 || near_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 12 || linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 12 || inear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermed | | +Epoch 12 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[o | | +Epoch 12 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 12 || linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 12 || near_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 12 || linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 12 || inear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermed | | +Epoch 12 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[o | | +Epoch 12 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 12 || linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 12 || near_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 12 || linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 12 || inear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermed | | +Epoch 12 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[o | | +Epoch 12 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 12 || linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 12 || near_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 12 || linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 12 || inear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermed | | +Epoch 12 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[o | | +Epoch 12 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 12 || linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 12 || near_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 12 || linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 12 || inear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermed | | +Epoch 12 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[o | | +Epoch 12 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 12 || linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 12 || near_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 12 || linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 12 || inear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermed | | +Epoch 12 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[o | | +Epoch 12 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 12 || linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 12 || near_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 12 || linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 12 || inear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermed | | +Epoch 12 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[o | | +Epoch 12 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 12 || linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 12 || near_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 12 || linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 12 || inear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermed | | +Epoch 12 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[o | | +Epoch 12 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 12 || linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 12 || near_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 12 || linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 12 || inear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermed | | +Epoch 12 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[o | | +Epoch 12 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 12 || linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 12 || near_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 12 || linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 12 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 12 || inear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermed | | +Epoch 12 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[o | | +Epoch 12 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 12 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 12 || /linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 12 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 12 || inear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 12 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 12 || /linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 12 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 12 || linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertInterme | | +Epoch 12 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[ | | +Epoch 12 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 12 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 12 || /linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 12 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 12 || inear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 12 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 12 || /linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 12 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 12 || linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertInterme | | +Epoch 12 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +Epoch 12 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 12 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[ | | +Epoch 12 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 12 |+---------------------------------------------------------+--------------------+ +INFO:nncf:Statistics of the quantization algorithm: +Epoch 13 |+--------------------------------+-------+ +Epoch 13 || Statistic's name | Value | +Epoch 13 |+================================+=======+ +Epoch 13 || Ratio of enabled quantizations | 100 | +Epoch 13 |+--------------------------------+-------+ +Epoch 13 | +Epoch 13 |Statistics of the quantization share: +Epoch 13 |+----------------------------------+--------------------+ +Epoch 13 || Statistic's name | Value | +Epoch 13 |+==================================+====================+ +Epoch 13 || Symmetric WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 13 |+----------------------------------+--------------------+ +Epoch 13 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 13 |+----------------------------------+--------------------+ +Epoch 13 || Signed WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 13 |+----------------------------------+--------------------+ +Epoch 13 || Unsigned WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 13 |+----------------------------------+--------------------+ +Epoch 13 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 13 |+----------------------------------+--------------------+ +Epoch 13 || Per-channel WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 13 |+----------------------------------+--------------------+ +Epoch 13 || Placed WQs / Potential WQs | 72.55 % (74 / 102) | +Epoch 13 |+----------------------------------+--------------------+ +Epoch 13 || Symmetric AQs / All placed AQs | 24.24 % (24 / 99) | +Epoch 13 |+----------------------------------+--------------------+ +Epoch 13 || Asymmetric AQs / All placed AQs | 75.76 % (75 / 99) | +Epoch 13 |+----------------------------------+--------------------+ +Epoch 13 || Signed AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 13 |+----------------------------------+--------------------+ +Epoch 13 || Unsigned AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 13 |+----------------------------------+--------------------+ +Epoch 13 || Per-tensor AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 13 |+----------------------------------+--------------------+ +Epoch 13 || Per-channel AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 13 |+----------------------------------+--------------------+ +Epoch 13 | +Epoch 13 |Statistics of the bitwidth distribution: +Epoch 13 |+--------------+---------------------+--------------------+--------------------+ +Epoch 13 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 13 || | WQs | Placed AQs | Qs | +Epoch 13 |+==============+=====================+====================+====================+ +Epoch 13 || 8 | 100.00 % (74 / 74) | 100.00 % (99 / 99) | 100.00 % (173 / | +Epoch 13 || | | | 173) | +Epoch 13 |+--------------+---------------------+--------------------+--------------------+ +Epoch 13 | +Epoch 13 |Statistics of the sparsified model: +Epoch 13 |+-----------------------------------------+-------+ +Epoch 13 || Statistic's name | Value | +Epoch 13 |+=========================================+=======+ +Epoch 13 || Sparsity level of the whole model | 0.552 | +Epoch 13 |+-----------------------------------------+-------+ +Epoch 13 || Sparsity level of all sparsified layers | 0.712 | +Epoch 13 |+-----------------------------------------+-------+ +Epoch 13 | +Epoch 13 |Statistics by sparsified layers: +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 13 |+======================+================+================+=====================+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.504 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[0]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[qu | | | | +Epoch 13 || ery]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.512 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[0]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[ke | | | | +Epoch 13 || y]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.501 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[0]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[va | | | | +Epoch 13 || lue]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.518 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[0]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfOutput[ou | | | | +Epoch 13 || tput]/NNCFLinear[den | | | | +Epoch 13 || se]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [3072, 768] | 0.811 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[0]/Be | | | | +Epoch 13 || rtIntermediate[inter | | | | +Epoch 13 || mediate]/NNCFLinear[ | | | | +Epoch 13 || dense]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 3072] | 0.821 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[0]/Be | | | | +Epoch 13 || rtOutput[output]/NNC | | | | +Epoch 13 || FLinear[dense]/linea | | | | +Epoch 13 || r_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.500 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[1]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[qu | | | | +Epoch 13 || ery]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.503 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[1]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[ke | | | | +Epoch 13 || y]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.501 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[1]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[va | | | | +Epoch 13 || lue]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.521 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[1]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfOutput[ou | | | | +Epoch 13 || tput]/NNCFLinear[den | | | | +Epoch 13 || se]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [3072, 768] | 0.813 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[1]/Be | | | | +Epoch 13 || rtIntermediate[inter | | | | +Epoch 13 || mediate]/NNCFLinear[ | | | | +Epoch 13 || dense]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 3072] | 0.823 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[1]/Be | | | | +Epoch 13 || rtOutput[output]/NNC | | | | +Epoch 13 || FLinear[dense]/linea | | | | +Epoch 13 || r_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.520 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[2]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[qu | | | | +Epoch 13 || ery]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.520 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[2]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[ke | | | | +Epoch 13 || y]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.504 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[2]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[va | | | | +Epoch 13 || lue]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.515 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[2]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfOutput[ou | | | | +Epoch 13 || tput]/NNCFLinear[den | | | | +Epoch 13 || se]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [3072, 768] | 0.814 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[2]/Be | | | | +Epoch 13 || rtIntermediate[inter | | | | +Epoch 13 || mediate]/NNCFLinear[ | | | | +Epoch 13 || dense]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 3072] | 0.822 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[2]/Be | | | | +Epoch 13 || rtOutput[output]/NNC | | | | +Epoch 13 || FLinear[dense]/linea | | | | +Epoch 13 || r_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.498 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[3]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[qu | | | | +Epoch 13 || ery]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.499 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[3]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[ke | | | | +Epoch 13 || y]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.505 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[3]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[va | | | | +Epoch 13 || lue]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.511 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[3]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfOutput[ou | | | | +Epoch 13 || tput]/NNCFLinear[den | | | | +Epoch 13 || se]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [3072, 768] | 0.814 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[3]/Be | | | | +Epoch 13 || rtIntermediate[inter | | | | +Epoch 13 || mediate]/NNCFLinear[ | | | | +Epoch 13 || dense]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 3072] | 0.826 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[3]/Be | | | | +Epoch 13 || rtOutput[output]/NNC | | | | +Epoch 13 || FLinear[dense]/linea | | | | +Epoch 13 || r_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.494 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[4]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[qu | | | | +Epoch 13 || ery]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.494 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[4]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[ke | | | | +Epoch 13 || y]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.500 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[4]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[va | | | | +Epoch 13 || lue]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.509 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[4]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfOutput[ou | | | | +Epoch 13 || tput]/NNCFLinear[den | | | | +Epoch 13 || se]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [3072, 768] | 0.814 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[4]/Be | | | | +Epoch 13 || rtIntermediate[inter | | | | +Epoch 13 || mediate]/NNCFLinear[ | | | | +Epoch 13 || dense]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 3072] | 0.827 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[4]/Be | | | | +Epoch 13 || rtOutput[output]/NNC | | | | +Epoch 13 || FLinear[dense]/linea | | | | +Epoch 13 || r_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.493 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[5]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[qu | | | | +Epoch 13 || ery]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.495 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[5]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[ke | | | | +Epoch 13 || y]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.505 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[5]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[va | | | | +Epoch 13 || lue]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.509 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[5]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfOutput[ou | | | | +Epoch 13 || tput]/NNCFLinear[den | | | | +Epoch 13 || se]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [3072, 768] | 0.814 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[5]/Be | | | | +Epoch 13 || rtIntermediate[inter | | | | +Epoch 13 || mediate]/NNCFLinear[ | | | | +Epoch 13 || dense]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 3072] | 0.827 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[5]/Be | | | | +Epoch 13 || rtOutput[output]/NNC | | | | +Epoch 13 || FLinear[dense]/linea | | | | +Epoch 13 || r_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.491 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[6]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[qu | | | | +Epoch 13 || ery]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.493 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[6]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[ke | | | | +Epoch 13 || y]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.504 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[6]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[va | | | | +Epoch 13 || lue]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.509 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[6]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfOutput[ou | | | | +Epoch 13 || tput]/NNCFLinear[den | | | | +Epoch 13 || se]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [3072, 768] | 0.813 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[6]/Be | | | | +Epoch 13 || rtIntermediate[inter | | | | +Epoch 13 || mediate]/NNCFLinear[ | | | | +Epoch 13 || dense]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 3072] | 0.824 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[6]/Be | | | | +Epoch 13 || rtOutput[output]/NNC | | | | +Epoch 13 || FLinear[dense]/linea | | | | +Epoch 13 || r_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.492 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[7]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[qu | | | | +Epoch 13 || ery]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.493 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[7]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[ke | | | | +Epoch 13 || y]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.499 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[7]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[va | | | | +Epoch 13 || lue]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.504 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[7]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfOutput[ou | | | | +Epoch 13 || tput]/NNCFLinear[den | | | | +Epoch 13 || se]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [3072, 768] | 0.812 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[7]/Be | | | | +Epoch 13 || rtIntermediate[inter | | | | +Epoch 13 || mediate]/NNCFLinear[ | | | | +Epoch 13 || dense]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 3072] | 0.820 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[7]/Be | | | | +Epoch 13 || rtOutput[output]/NNC | | | | +Epoch 13 || FLinear[dense]/linea | | | | +Epoch 13 || r_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.492 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[8]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[qu | | | | +Epoch 13 || ery]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.493 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[8]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[ke | | | | +Epoch 13 || y]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.494 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[8]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[va | | | | +Epoch 13 || lue]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.499 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[8]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfOutput[ou | | | | +Epoch 13 || tput]/NNCFLinear[den | | | | +Epoch 13 || se]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [3072, 768] | 0.810 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[8]/Be | | | | +Epoch 13 || rtIntermediate[inter | | | | +Epoch 13 || mediate]/NNCFLinear[ | | | | +Epoch 13 || dense]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 3072] | 0.818 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[8]/Be | | | | +Epoch 13 || rtOutput[output]/NNC | | | | +Epoch 13 || FLinear[dense]/linea | | | | +Epoch 13 || r_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.489 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[9]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[qu | | | | +Epoch 13 || ery]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.491 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[9]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[ke | | | | +Epoch 13 || y]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.493 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[9]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfAttention | | | | +Epoch 13 || [self]/NNCFLinear[va | | | | +Epoch 13 || lue]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.494 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[9]/Be | | | | +Epoch 13 || rtAttention[attentio | | | | +Epoch 13 || n]/BertSelfOutput[ou | | | | +Epoch 13 || tput]/NNCFLinear[den | | | | +Epoch 13 || se]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [3072, 768] | 0.815 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[9]/Be | | | | +Epoch 13 || rtIntermediate[inter | | | | +Epoch 13 || mediate]/NNCFLinear[ | | | | +Epoch 13 || dense]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 3072] | 0.822 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[9]/Be | | | | +Epoch 13 || rtOutput[output]/NNC | | | | +Epoch 13 || FLinear[dense]/linea | | | | +Epoch 13 || r_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.491 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[10]/B | | | | +Epoch 13 || ertAttention[attenti | | | | +Epoch 13 || on]/BertSelfAttentio | | | | +Epoch 13 || n[self]/NNCFLinear[q | | | | +Epoch 13 || uery]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.492 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[10]/B | | | | +Epoch 13 || ertAttention[attenti | | | | +Epoch 13 || on]/BertSelfAttentio | | | | +Epoch 13 || n[self]/NNCFLinear[k | | | | +Epoch 13 || ey]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.506 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[10]/B | | | | +Epoch 13 || ertAttention[attenti | | | | +Epoch 13 || on]/BertSelfAttentio | | | | +Epoch 13 || n[self]/NNCFLinear[v | | | | +Epoch 13 || alue]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.500 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[10]/B | | | | +Epoch 13 || ertAttention[attenti | | | | +Epoch 13 || on]/BertSelfOutput[o | | | | +Epoch 13 || utput]/NNCFLinear[de | | | | +Epoch 13 || nse]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [3072, 768] | 0.811 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[10]/B | | | | +Epoch 13 || ertIntermediate[inte | | | | +Epoch 13 || rmediate]/NNCFLinear | | | | +Epoch 13 || [dense]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 3072] | 0.817 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[10]/B | | | | +Epoch 13 || ertOutput[output]/NN | | | | +Epoch 13 || CFLinear[dense]/line | | | | +Epoch 13 || ar_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.495 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[11]/B | | | | +Epoch 13 || ertAttention[attenti | | | | +Epoch 13 || on]/BertSelfAttentio | | | | +Epoch 13 || n[self]/NNCFLinear[q | | | | +Epoch 13 || uery]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.490 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[11]/B | | | | +Epoch 13 || ertAttention[attenti | | | | +Epoch 13 || on]/BertSelfAttentio | | | | +Epoch 13 || n[self]/NNCFLinear[k | | | | +Epoch 13 || ey]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.496 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[11]/B | | | | +Epoch 13 || ertAttention[attenti | | | | +Epoch 13 || on]/BertSelfAttentio | | | | +Epoch 13 || n[self]/NNCFLinear[v | | | | +Epoch 13 || alue]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 768] | 0.491 | 0.694 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[11]/B | | | | +Epoch 13 || ertAttention[attenti | | | | +Epoch 13 || on]/BertSelfOutput[o | | | | +Epoch 13 || utput]/NNCFLinear[de | | | | +Epoch 13 || nse]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [3072, 768] | 0.809 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[11]/B | | | | +Epoch 13 || ertIntermediate[inte | | | | +Epoch 13 || rmediate]/NNCFLinear | | | | +Epoch 13 || [dense]/linear_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 || BertForSequenceClass | [768, 3072] | 0.812 | 2.778 | +Epoch 13 || ification/BertModel[ | | | | +Epoch 13 || bert]/BertEncoder[en | | | | +Epoch 13 || coder]/ModuleList[la | | | | +Epoch 13 || yer]/BertLayer[11]/B | | | | +Epoch 13 || ertOutput[output]/NN | | | | +Epoch 13 || CFLinear[dense]/line | | | | +Epoch 13 || ar_0 | | | | +Epoch 13 |+----------------------+----------------+----------------+---------------------+ +Epoch 13 | +Epoch 13 |Statistics of the magnitude sparsity algorithm: +Epoch 13 |+----------------------------------------------------------------------+-------+ +Epoch 13 || Statistic's name | Value | +Epoch 13 |+======================================================================+=======+ +Epoch 13 || A target level of the sparsity for the algorithm for the current | 0.712 | +Epoch 13 || epoch | | +Epoch 13 |+----------------------------------------------------------------------+-------+ +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || Layer's name | Sparsity threshold | +Epoch 13 |+=========================================================+====================+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 13 || linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 13 || near_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 13 || linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 13 || inear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermed | | +Epoch 13 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[o | | +Epoch 13 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 13 || linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 13 || near_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 13 || linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 13 || inear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermed | | +Epoch 13 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[o | | +Epoch 13 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 13 || linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 13 || near_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 13 || linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 13 || inear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermed | | +Epoch 13 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[o | | +Epoch 13 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 13 || linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 13 || near_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 13 || linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 13 || inear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermed | | +Epoch 13 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[o | | +Epoch 13 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 13 || linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 13 || near_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 13 || linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 13 || inear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermed | | +Epoch 13 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[o | | +Epoch 13 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 13 || linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 13 || near_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 13 || linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 13 || inear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermed | | +Epoch 13 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[o | | +Epoch 13 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 13 || linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 13 || near_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 13 || linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 13 || inear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermed | | +Epoch 13 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[o | | +Epoch 13 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 13 || linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 13 || near_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 13 || linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 13 || inear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermed | | +Epoch 13 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[o | | +Epoch 13 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 13 || linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 13 || near_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 13 || linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 13 || inear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermed | | +Epoch 13 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[o | | +Epoch 13 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 13 || linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 13 || near_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 13 || linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 13 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 13 || inear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermed | | +Epoch 13 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[o | | +Epoch 13 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 13 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 13 || /linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 13 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 13 || inear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 13 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 13 || /linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 13 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 13 || linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertInterme | | +Epoch 13 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[ | | +Epoch 13 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 13 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 13 || /linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 13 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 13 || inear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 13 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 13 || /linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 13 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 13 || linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertInterme | | +Epoch 13 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +Epoch 13 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 13 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[ | | +Epoch 13 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 13 |+---------------------------------------------------------+--------------------+ +INFO:nncf:Statistics of the quantization algorithm: +Epoch 14 |+--------------------------------+-------+ +Epoch 14 || Statistic's name | Value | +Epoch 14 |+================================+=======+ +Epoch 14 || Ratio of enabled quantizations | 100 | +Epoch 14 |+--------------------------------+-------+ +Epoch 14 | +Epoch 14 |Statistics of the quantization share: +Epoch 14 |+----------------------------------+--------------------+ +Epoch 14 || Statistic's name | Value | +Epoch 14 |+==================================+====================+ +Epoch 14 || Symmetric WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 14 |+----------------------------------+--------------------+ +Epoch 14 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 14 |+----------------------------------+--------------------+ +Epoch 14 || Signed WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 14 |+----------------------------------+--------------------+ +Epoch 14 || Unsigned WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 14 |+----------------------------------+--------------------+ +Epoch 14 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 14 |+----------------------------------+--------------------+ +Epoch 14 || Per-channel WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 14 |+----------------------------------+--------------------+ +Epoch 14 || Placed WQs / Potential WQs | 72.55 % (74 / 102) | +Epoch 14 |+----------------------------------+--------------------+ +Epoch 14 || Symmetric AQs / All placed AQs | 24.24 % (24 / 99) | +Epoch 14 |+----------------------------------+--------------------+ +Epoch 14 || Asymmetric AQs / All placed AQs | 75.76 % (75 / 99) | +Epoch 14 |+----------------------------------+--------------------+ +Epoch 14 || Signed AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 14 |+----------------------------------+--------------------+ +Epoch 14 || Unsigned AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 14 |+----------------------------------+--------------------+ +Epoch 14 || Per-tensor AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 14 |+----------------------------------+--------------------+ +Epoch 14 || Per-channel AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 14 |+----------------------------------+--------------------+ +Epoch 14 | +Epoch 14 |Statistics of the bitwidth distribution: +Epoch 14 |+--------------+---------------------+--------------------+--------------------+ +Epoch 14 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 14 || | WQs | Placed AQs | Qs | +Epoch 14 |+==============+=====================+====================+====================+ +Epoch 14 || 8 | 100.00 % (74 / 74) | 100.00 % (99 / 99) | 100.00 % (173 / | +Epoch 14 || | | | 173) | +Epoch 14 |+--------------+---------------------+--------------------+--------------------+ +Epoch 14 | +Epoch 14 |Statistics of the sparsified model: +Epoch 14 |+-----------------------------------------+-------+ +Epoch 14 || Statistic's name | Value | +Epoch 14 |+=========================================+=======+ +Epoch 14 || Sparsity level of the whole model | 0.568 | +Epoch 14 |+-----------------------------------------+-------+ +Epoch 14 || Sparsity level of all sparsified layers | 0.732 | +Epoch 14 |+-----------------------------------------+-------+ +Epoch 14 | +Epoch 14 |Statistics by sparsified layers: +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 14 |+======================+================+================+=====================+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.525 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[0]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[qu | | | | +Epoch 14 || ery]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.533 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[0]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[ke | | | | +Epoch 14 || y]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.521 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[0]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[va | | | | +Epoch 14 || lue]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.538 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[0]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfOutput[ou | | | | +Epoch 14 || tput]/NNCFLinear[den | | | | +Epoch 14 || se]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [3072, 768] | 0.832 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[0]/Be | | | | +Epoch 14 || rtIntermediate[inter | | | | +Epoch 14 || mediate]/NNCFLinear[ | | | | +Epoch 14 || dense]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 3072] | 0.841 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[0]/Be | | | | +Epoch 14 || rtOutput[output]/NNC | | | | +Epoch 14 || FLinear[dense]/linea | | | | +Epoch 14 || r_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.520 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[1]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[qu | | | | +Epoch 14 || ery]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.524 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[1]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[ke | | | | +Epoch 14 || y]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.520 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[1]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[va | | | | +Epoch 14 || lue]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.540 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[1]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfOutput[ou | | | | +Epoch 14 || tput]/NNCFLinear[den | | | | +Epoch 14 || se]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [3072, 768] | 0.833 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[1]/Be | | | | +Epoch 14 || rtIntermediate[inter | | | | +Epoch 14 || mediate]/NNCFLinear[ | | | | +Epoch 14 || dense]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 3072] | 0.843 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[1]/Be | | | | +Epoch 14 || rtOutput[output]/NNC | | | | +Epoch 14 || FLinear[dense]/linea | | | | +Epoch 14 || r_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.541 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[2]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[qu | | | | +Epoch 14 || ery]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.541 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[2]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[ke | | | | +Epoch 14 || y]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.524 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[2]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[va | | | | +Epoch 14 || lue]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.534 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[2]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfOutput[ou | | | | +Epoch 14 || tput]/NNCFLinear[den | | | | +Epoch 14 || se]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [3072, 768] | 0.834 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[2]/Be | | | | +Epoch 14 || rtIntermediate[inter | | | | +Epoch 14 || mediate]/NNCFLinear[ | | | | +Epoch 14 || dense]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 3072] | 0.842 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[2]/Be | | | | +Epoch 14 || rtOutput[output]/NNC | | | | +Epoch 14 || FLinear[dense]/linea | | | | +Epoch 14 || r_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.518 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[3]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[qu | | | | +Epoch 14 || ery]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.520 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[3]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[ke | | | | +Epoch 14 || y]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.525 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[3]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[va | | | | +Epoch 14 || lue]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.531 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[3]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfOutput[ou | | | | +Epoch 14 || tput]/NNCFLinear[den | | | | +Epoch 14 || se]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [3072, 768] | 0.834 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[3]/Be | | | | +Epoch 14 || rtIntermediate[inter | | | | +Epoch 14 || mediate]/NNCFLinear[ | | | | +Epoch 14 || dense]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 3072] | 0.846 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[3]/Be | | | | +Epoch 14 || rtOutput[output]/NNC | | | | +Epoch 14 || FLinear[dense]/linea | | | | +Epoch 14 || r_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.515 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[4]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[qu | | | | +Epoch 14 || ery]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.515 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[4]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[ke | | | | +Epoch 14 || y]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.521 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[4]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[va | | | | +Epoch 14 || lue]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.530 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[4]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfOutput[ou | | | | +Epoch 14 || tput]/NNCFLinear[den | | | | +Epoch 14 || se]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [3072, 768] | 0.835 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[4]/Be | | | | +Epoch 14 || rtIntermediate[inter | | | | +Epoch 14 || mediate]/NNCFLinear[ | | | | +Epoch 14 || dense]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 3072] | 0.846 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[4]/Be | | | | +Epoch 14 || rtOutput[output]/NNC | | | | +Epoch 14 || FLinear[dense]/linea | | | | +Epoch 14 || r_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.514 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[5]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[qu | | | | +Epoch 14 || ery]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.516 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[5]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[ke | | | | +Epoch 14 || y]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.525 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[5]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[va | | | | +Epoch 14 || lue]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.530 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[5]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfOutput[ou | | | | +Epoch 14 || tput]/NNCFLinear[den | | | | +Epoch 14 || se]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [3072, 768] | 0.835 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[5]/Be | | | | +Epoch 14 || rtIntermediate[inter | | | | +Epoch 14 || mediate]/NNCFLinear[ | | | | +Epoch 14 || dense]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 3072] | 0.846 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[5]/Be | | | | +Epoch 14 || rtOutput[output]/NNC | | | | +Epoch 14 || FLinear[dense]/linea | | | | +Epoch 14 || r_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.512 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[6]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[qu | | | | +Epoch 14 || ery]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.514 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[6]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[ke | | | | +Epoch 14 || y]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.524 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[6]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[va | | | | +Epoch 14 || lue]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.530 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[6]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfOutput[ou | | | | +Epoch 14 || tput]/NNCFLinear[den | | | | +Epoch 14 || se]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [3072, 768] | 0.834 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[6]/Be | | | | +Epoch 14 || rtIntermediate[inter | | | | +Epoch 14 || mediate]/NNCFLinear[ | | | | +Epoch 14 || dense]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 3072] | 0.843 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[6]/Be | | | | +Epoch 14 || rtOutput[output]/NNC | | | | +Epoch 14 || FLinear[dense]/linea | | | | +Epoch 14 || r_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.513 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[7]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[qu | | | | +Epoch 14 || ery]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.513 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[7]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[ke | | | | +Epoch 14 || y]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.519 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[7]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[va | | | | +Epoch 14 || lue]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.524 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[7]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfOutput[ou | | | | +Epoch 14 || tput]/NNCFLinear[den | | | | +Epoch 14 || se]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [3072, 768] | 0.832 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[7]/Be | | | | +Epoch 14 || rtIntermediate[inter | | | | +Epoch 14 || mediate]/NNCFLinear[ | | | | +Epoch 14 || dense]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 3072] | 0.840 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[7]/Be | | | | +Epoch 14 || rtOutput[output]/NNC | | | | +Epoch 14 || FLinear[dense]/linea | | | | +Epoch 14 || r_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.513 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[8]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[qu | | | | +Epoch 14 || ery]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.513 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[8]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[ke | | | | +Epoch 14 || y]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.515 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[8]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[va | | | | +Epoch 14 || lue]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.520 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[8]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfOutput[ou | | | | +Epoch 14 || tput]/NNCFLinear[den | | | | +Epoch 14 || se]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [3072, 768] | 0.831 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[8]/Be | | | | +Epoch 14 || rtIntermediate[inter | | | | +Epoch 14 || mediate]/NNCFLinear[ | | | | +Epoch 14 || dense]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 3072] | 0.839 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[8]/Be | | | | +Epoch 14 || rtOutput[output]/NNC | | | | +Epoch 14 || FLinear[dense]/linea | | | | +Epoch 14 || r_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.510 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[9]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[qu | | | | +Epoch 14 || ery]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.512 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[9]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[ke | | | | +Epoch 14 || y]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.514 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[9]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfAttention | | | | +Epoch 14 || [self]/NNCFLinear[va | | | | +Epoch 14 || lue]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.514 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[9]/Be | | | | +Epoch 14 || rtAttention[attentio | | | | +Epoch 14 || n]/BertSelfOutput[ou | | | | +Epoch 14 || tput]/NNCFLinear[den | | | | +Epoch 14 || se]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [3072, 768] | 0.836 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[9]/Be | | | | +Epoch 14 || rtIntermediate[inter | | | | +Epoch 14 || mediate]/NNCFLinear[ | | | | +Epoch 14 || dense]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 3072] | 0.842 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[9]/Be | | | | +Epoch 14 || rtOutput[output]/NNC | | | | +Epoch 14 || FLinear[dense]/linea | | | | +Epoch 14 || r_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.512 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[10]/B | | | | +Epoch 14 || ertAttention[attenti | | | | +Epoch 14 || on]/BertSelfAttentio | | | | +Epoch 14 || n[self]/NNCFLinear[q | | | | +Epoch 14 || uery]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.512 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[10]/B | | | | +Epoch 14 || ertAttention[attenti | | | | +Epoch 14 || on]/BertSelfAttentio | | | | +Epoch 14 || n[self]/NNCFLinear[k | | | | +Epoch 14 || ey]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.527 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[10]/B | | | | +Epoch 14 || ertAttention[attenti | | | | +Epoch 14 || on]/BertSelfAttentio | | | | +Epoch 14 || n[self]/NNCFLinear[v | | | | +Epoch 14 || alue]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.521 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[10]/B | | | | +Epoch 14 || ertAttention[attenti | | | | +Epoch 14 || on]/BertSelfOutput[o | | | | +Epoch 14 || utput]/NNCFLinear[de | | | | +Epoch 14 || nse]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [3072, 768] | 0.832 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[10]/B | | | | +Epoch 14 || ertIntermediate[inte | | | | +Epoch 14 || rmediate]/NNCFLinear | | | | +Epoch 14 || [dense]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 3072] | 0.837 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[10]/B | | | | +Epoch 14 || ertOutput[output]/NN | | | | +Epoch 14 || CFLinear[dense]/line | | | | +Epoch 14 || ar_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.515 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[11]/B | | | | +Epoch 14 || ertAttention[attenti | | | | +Epoch 14 || on]/BertSelfAttentio | | | | +Epoch 14 || n[self]/NNCFLinear[q | | | | +Epoch 14 || uery]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.510 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[11]/B | | | | +Epoch 14 || ertAttention[attenti | | | | +Epoch 14 || on]/BertSelfAttentio | | | | +Epoch 14 || n[self]/NNCFLinear[k | | | | +Epoch 14 || ey]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.517 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[11]/B | | | | +Epoch 14 || ertAttention[attenti | | | | +Epoch 14 || on]/BertSelfAttentio | | | | +Epoch 14 || n[self]/NNCFLinear[v | | | | +Epoch 14 || alue]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 768] | 0.512 | 0.694 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[11]/B | | | | +Epoch 14 || ertAttention[attenti | | | | +Epoch 14 || on]/BertSelfOutput[o | | | | +Epoch 14 || utput]/NNCFLinear[de | | | | +Epoch 14 || nse]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [3072, 768] | 0.830 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[11]/B | | | | +Epoch 14 || ertIntermediate[inte | | | | +Epoch 14 || rmediate]/NNCFLinear | | | | +Epoch 14 || [dense]/linear_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 || BertForSequenceClass | [768, 3072] | 0.833 | 2.778 | +Epoch 14 || ification/BertModel[ | | | | +Epoch 14 || bert]/BertEncoder[en | | | | +Epoch 14 || coder]/ModuleList[la | | | | +Epoch 14 || yer]/BertLayer[11]/B | | | | +Epoch 14 || ertOutput[output]/NN | | | | +Epoch 14 || CFLinear[dense]/line | | | | +Epoch 14 || ar_0 | | | | +Epoch 14 |+----------------------+----------------+----------------+---------------------+ +Epoch 14 | +Epoch 14 |Statistics of the magnitude sparsity algorithm: +Epoch 14 |+----------------------------------------------------------------------+-------+ +Epoch 14 || Statistic's name | Value | +Epoch 14 |+======================================================================+=======+ +Epoch 14 || A target level of the sparsity for the algorithm for the current | 0.732 | +Epoch 14 || epoch | | +Epoch 14 |+----------------------------------------------------------------------+-------+ +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || Layer's name | Sparsity threshold | +Epoch 14 |+=========================================================+====================+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 14 || linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 14 || near_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 14 || linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 14 || inear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermed | | +Epoch 14 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[o | | +Epoch 14 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 14 || linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 14 || near_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 14 || linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 14 || inear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermed | | +Epoch 14 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[o | | +Epoch 14 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 14 || linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 14 || near_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 14 || linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 14 || inear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermed | | +Epoch 14 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[o | | +Epoch 14 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 14 || linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 14 || near_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 14 || linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 14 || inear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermed | | +Epoch 14 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[o | | +Epoch 14 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 14 || linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 14 || near_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 14 || linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 14 || inear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermed | | +Epoch 14 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[o | | +Epoch 14 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 14 || linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 14 || near_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 14 || linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 14 || inear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermed | | +Epoch 14 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[o | | +Epoch 14 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 14 || linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 14 || near_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 14 || linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 14 || inear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermed | | +Epoch 14 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[o | | +Epoch 14 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 14 || linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 14 || near_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 14 || linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 14 || inear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermed | | +Epoch 14 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[o | | +Epoch 14 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 14 || linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 14 || near_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 14 || linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 14 || inear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermed | | +Epoch 14 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[o | | +Epoch 14 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 14 || linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 14 || near_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 14 || linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 14 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 14 || inear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermed | | +Epoch 14 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[o | | +Epoch 14 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 14 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 14 || /linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 14 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 14 || inear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 14 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 14 || /linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 14 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 14 || linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertInterme | | +Epoch 14 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[ | | +Epoch 14 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 14 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 14 || /linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 14 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 14 || inear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 14 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 14 || /linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 14 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 14 || linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertInterme | | +Epoch 14 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +Epoch 14 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 14 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[ | | +Epoch 14 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 14 |+---------------------------------------------------------+--------------------+ +INFO:nncf:Statistics of the quantization algorithm: +Epoch 15 |+--------------------------------+-------+ +Epoch 15 || Statistic's name | Value | +Epoch 15 |+================================+=======+ +Epoch 15 || Ratio of enabled quantizations | 100 | +Epoch 15 |+--------------------------------+-------+ +Epoch 15 | +Epoch 15 |Statistics of the quantization share: +Epoch 15 |+----------------------------------+--------------------+ +Epoch 15 || Statistic's name | Value | +Epoch 15 |+==================================+====================+ +Epoch 15 || Symmetric WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 15 |+----------------------------------+--------------------+ +Epoch 15 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 15 |+----------------------------------+--------------------+ +Epoch 15 || Signed WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 15 |+----------------------------------+--------------------+ +Epoch 15 || Unsigned WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 15 |+----------------------------------+--------------------+ +Epoch 15 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 15 |+----------------------------------+--------------------+ +Epoch 15 || Per-channel WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 15 |+----------------------------------+--------------------+ +Epoch 15 || Placed WQs / Potential WQs | 72.55 % (74 / 102) | +Epoch 15 |+----------------------------------+--------------------+ +Epoch 15 || Symmetric AQs / All placed AQs | 24.24 % (24 / 99) | +Epoch 15 |+----------------------------------+--------------------+ +Epoch 15 || Asymmetric AQs / All placed AQs | 75.76 % (75 / 99) | +Epoch 15 |+----------------------------------+--------------------+ +Epoch 15 || Signed AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 15 |+----------------------------------+--------------------+ +Epoch 15 || Unsigned AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 15 |+----------------------------------+--------------------+ +Epoch 15 || Per-tensor AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 15 |+----------------------------------+--------------------+ +Epoch 15 || Per-channel AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 15 |+----------------------------------+--------------------+ +Epoch 15 | +Epoch 15 |Statistics of the bitwidth distribution: +Epoch 15 |+--------------+---------------------+--------------------+--------------------+ +Epoch 15 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 15 || | WQs | Placed AQs | Qs | +Epoch 15 |+==============+=====================+====================+====================+ +Epoch 15 || 8 | 100.00 % (74 / 74) | 100.00 % (99 / 99) | 100.00 % (173 / | +Epoch 15 || | | | 173) | +Epoch 15 |+--------------+---------------------+--------------------+--------------------+ +Epoch 15 | +Epoch 15 |Statistics of the sparsified model: +Epoch 15 |+-----------------------------------------+-------+ +Epoch 15 || Statistic's name | Value | +Epoch 15 |+=========================================+=======+ +Epoch 15 || Sparsity level of the whole model | 0.581 | +Epoch 15 |+-----------------------------------------+-------+ +Epoch 15 || Sparsity level of all sparsified layers | 0.749 | +Epoch 15 |+-----------------------------------------+-------+ +Epoch 15 | +Epoch 15 |Statistics by sparsified layers: +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 15 |+======================+================+================+=====================+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.550 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[0]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[qu | | | | +Epoch 15 || ery]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.558 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[0]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[ke | | | | +Epoch 15 || y]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.555 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[0]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[va | | | | +Epoch 15 || lue]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.574 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[0]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfOutput[ou | | | | +Epoch 15 || tput]/NNCFLinear[den | | | | +Epoch 15 || se]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [3072, 768] | 0.845 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[0]/Be | | | | +Epoch 15 || rtIntermediate[inter | | | | +Epoch 15 || mediate]/NNCFLinear[ | | | | +Epoch 15 || dense]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 3072] | 0.853 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[0]/Be | | | | +Epoch 15 || rtOutput[output]/NNC | | | | +Epoch 15 || FLinear[dense]/linea | | | | +Epoch 15 || r_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.546 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[1]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[qu | | | | +Epoch 15 || ery]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.550 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[1]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[ke | | | | +Epoch 15 || y]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.555 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[1]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[va | | | | +Epoch 15 || lue]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.576 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[1]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfOutput[ou | | | | +Epoch 15 || tput]/NNCFLinear[den | | | | +Epoch 15 || se]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [3072, 768] | 0.846 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[1]/Be | | | | +Epoch 15 || rtIntermediate[inter | | | | +Epoch 15 || mediate]/NNCFLinear[ | | | | +Epoch 15 || dense]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 3072] | 0.855 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[1]/Be | | | | +Epoch 15 || rtOutput[output]/NNC | | | | +Epoch 15 || FLinear[dense]/linea | | | | +Epoch 15 || r_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.564 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[2]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[qu | | | | +Epoch 15 || ery]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.565 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[2]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[ke | | | | +Epoch 15 || y]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.559 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[2]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[va | | | | +Epoch 15 || lue]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.570 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[2]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfOutput[ou | | | | +Epoch 15 || tput]/NNCFLinear[den | | | | +Epoch 15 || se]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [3072, 768] | 0.847 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[2]/Be | | | | +Epoch 15 || rtIntermediate[inter | | | | +Epoch 15 || mediate]/NNCFLinear[ | | | | +Epoch 15 || dense]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 3072] | 0.854 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[2]/Be | | | | +Epoch 15 || rtOutput[output]/NNC | | | | +Epoch 15 || FLinear[dense]/linea | | | | +Epoch 15 || r_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.544 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[3]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[qu | | | | +Epoch 15 || ery]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.546 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[3]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[ke | | | | +Epoch 15 || y]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.556 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[3]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[va | | | | +Epoch 15 || lue]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.565 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[3]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfOutput[ou | | | | +Epoch 15 || tput]/NNCFLinear[den | | | | +Epoch 15 || se]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [3072, 768] | 0.847 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[3]/Be | | | | +Epoch 15 || rtIntermediate[inter | | | | +Epoch 15 || mediate]/NNCFLinear[ | | | | +Epoch 15 || dense]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 3072] | 0.857 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[3]/Be | | | | +Epoch 15 || rtOutput[output]/NNC | | | | +Epoch 15 || FLinear[dense]/linea | | | | +Epoch 15 || r_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.541 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[4]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[qu | | | | +Epoch 15 || ery]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.541 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[4]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[ke | | | | +Epoch 15 || y]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.549 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[4]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[va | | | | +Epoch 15 || lue]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.560 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[4]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfOutput[ou | | | | +Epoch 15 || tput]/NNCFLinear[den | | | | +Epoch 15 || se]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [3072, 768] | 0.847 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[4]/Be | | | | +Epoch 15 || rtIntermediate[inter | | | | +Epoch 15 || mediate]/NNCFLinear[ | | | | +Epoch 15 || dense]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 3072] | 0.857 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[4]/Be | | | | +Epoch 15 || rtOutput[output]/NNC | | | | +Epoch 15 || FLinear[dense]/linea | | | | +Epoch 15 || r_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.539 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[5]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[qu | | | | +Epoch 15 || ery]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.541 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[5]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[ke | | | | +Epoch 15 || y]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.553 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[5]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[va | | | | +Epoch 15 || lue]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.560 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[5]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfOutput[ou | | | | +Epoch 15 || tput]/NNCFLinear[den | | | | +Epoch 15 || se]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [3072, 768] | 0.847 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[5]/Be | | | | +Epoch 15 || rtIntermediate[inter | | | | +Epoch 15 || mediate]/NNCFLinear[ | | | | +Epoch 15 || dense]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 3072] | 0.857 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[5]/Be | | | | +Epoch 15 || rtOutput[output]/NNC | | | | +Epoch 15 || FLinear[dense]/linea | | | | +Epoch 15 || r_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.538 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[6]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[qu | | | | +Epoch 15 || ery]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.539 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[6]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[ke | | | | +Epoch 15 || y]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.553 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[6]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[va | | | | +Epoch 15 || lue]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.560 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[6]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfOutput[ou | | | | +Epoch 15 || tput]/NNCFLinear[den | | | | +Epoch 15 || se]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [3072, 768] | 0.846 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[6]/Be | | | | +Epoch 15 || rtIntermediate[inter | | | | +Epoch 15 || mediate]/NNCFLinear[ | | | | +Epoch 15 || dense]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 3072] | 0.854 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[6]/Be | | | | +Epoch 15 || rtOutput[output]/NNC | | | | +Epoch 15 || FLinear[dense]/linea | | | | +Epoch 15 || r_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.537 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[7]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[qu | | | | +Epoch 15 || ery]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.538 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[7]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[ke | | | | +Epoch 15 || y]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.547 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[7]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[va | | | | +Epoch 15 || lue]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.553 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[7]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfOutput[ou | | | | +Epoch 15 || tput]/NNCFLinear[den | | | | +Epoch 15 || se]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [3072, 768] | 0.845 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[7]/Be | | | | +Epoch 15 || rtIntermediate[inter | | | | +Epoch 15 || mediate]/NNCFLinear[ | | | | +Epoch 15 || dense]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 3072] | 0.851 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[7]/Be | | | | +Epoch 15 || rtOutput[output]/NNC | | | | +Epoch 15 || FLinear[dense]/linea | | | | +Epoch 15 || r_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.537 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[8]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[qu | | | | +Epoch 15 || ery]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.537 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[8]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[ke | | | | +Epoch 15 || y]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.541 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[8]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[va | | | | +Epoch 15 || lue]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.547 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[8]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfOutput[ou | | | | +Epoch 15 || tput]/NNCFLinear[den | | | | +Epoch 15 || se]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [3072, 768] | 0.844 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[8]/Be | | | | +Epoch 15 || rtIntermediate[inter | | | | +Epoch 15 || mediate]/NNCFLinear[ | | | | +Epoch 15 || dense]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 3072] | 0.850 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[8]/Be | | | | +Epoch 15 || rtOutput[output]/NNC | | | | +Epoch 15 || FLinear[dense]/linea | | | | +Epoch 15 || r_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.532 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[9]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[qu | | | | +Epoch 15 || ery]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.535 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[9]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[ke | | | | +Epoch 15 || y]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.539 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[9]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfAttention | | | | +Epoch 15 || [self]/NNCFLinear[va | | | | +Epoch 15 || lue]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.541 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[9]/Be | | | | +Epoch 15 || rtAttention[attentio | | | | +Epoch 15 || n]/BertSelfOutput[ou | | | | +Epoch 15 || tput]/NNCFLinear[den | | | | +Epoch 15 || se]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [3072, 768] | 0.848 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[9]/Be | | | | +Epoch 15 || rtIntermediate[inter | | | | +Epoch 15 || mediate]/NNCFLinear[ | | | | +Epoch 15 || dense]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 3072] | 0.854 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[9]/Be | | | | +Epoch 15 || rtOutput[output]/NNC | | | | +Epoch 15 || FLinear[dense]/linea | | | | +Epoch 15 || r_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.533 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[10]/B | | | | +Epoch 15 || ertAttention[attenti | | | | +Epoch 15 || on]/BertSelfAttentio | | | | +Epoch 15 || n[self]/NNCFLinear[q | | | | +Epoch 15 || uery]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.534 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[10]/B | | | | +Epoch 15 || ertAttention[attenti | | | | +Epoch 15 || on]/BertSelfAttentio | | | | +Epoch 15 || n[self]/NNCFLinear[k | | | | +Epoch 15 || ey]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.550 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[10]/B | | | | +Epoch 15 || ertAttention[attenti | | | | +Epoch 15 || on]/BertSelfAttentio | | | | +Epoch 15 || n[self]/NNCFLinear[v | | | | +Epoch 15 || alue]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.545 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[10]/B | | | | +Epoch 15 || ertAttention[attenti | | | | +Epoch 15 || on]/BertSelfOutput[o | | | | +Epoch 15 || utput]/NNCFLinear[de | | | | +Epoch 15 || nse]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [3072, 768] | 0.844 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[10]/B | | | | +Epoch 15 || ertIntermediate[inte | | | | +Epoch 15 || rmediate]/NNCFLinear | | | | +Epoch 15 || [dense]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 3072] | 0.849 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[10]/B | | | | +Epoch 15 || ertOutput[output]/NN | | | | +Epoch 15 || CFLinear[dense]/line | | | | +Epoch 15 || ar_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.537 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[11]/B | | | | +Epoch 15 || ertAttention[attenti | | | | +Epoch 15 || on]/BertSelfAttentio | | | | +Epoch 15 || n[self]/NNCFLinear[q | | | | +Epoch 15 || uery]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.533 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[11]/B | | | | +Epoch 15 || ertAttention[attenti | | | | +Epoch 15 || on]/BertSelfAttentio | | | | +Epoch 15 || n[self]/NNCFLinear[k | | | | +Epoch 15 || ey]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.538 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[11]/B | | | | +Epoch 15 || ertAttention[attenti | | | | +Epoch 15 || on]/BertSelfAttentio | | | | +Epoch 15 || n[self]/NNCFLinear[v | | | | +Epoch 15 || alue]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 768] | 0.534 | 0.694 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[11]/B | | | | +Epoch 15 || ertAttention[attenti | | | | +Epoch 15 || on]/BertSelfOutput[o | | | | +Epoch 15 || utput]/NNCFLinear[de | | | | +Epoch 15 || nse]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [3072, 768] | 0.841 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[11]/B | | | | +Epoch 15 || ertIntermediate[inte | | | | +Epoch 15 || rmediate]/NNCFLinear | | | | +Epoch 15 || [dense]/linear_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 || BertForSequenceClass | [768, 3072] | 0.843 | 2.778 | +Epoch 15 || ification/BertModel[ | | | | +Epoch 15 || bert]/BertEncoder[en | | | | +Epoch 15 || coder]/ModuleList[la | | | | +Epoch 15 || yer]/BertLayer[11]/B | | | | +Epoch 15 || ertOutput[output]/NN | | | | +Epoch 15 || CFLinear[dense]/line | | | | +Epoch 15 || ar_0 | | | | +Epoch 15 |+----------------------+----------------+----------------+---------------------+ +Epoch 15 | +Epoch 15 |Statistics of the magnitude sparsity algorithm: +Epoch 15 |+----------------------------------------------------------------------+-------+ +Epoch 15 || Statistic's name | Value | +Epoch 15 |+======================================================================+=======+ +Epoch 15 || A target level of the sparsity for the algorithm for the current | 0.749 | +Epoch 15 || epoch | | +Epoch 15 |+----------------------------------------------------------------------+-------+ +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || Layer's name | Sparsity threshold | +Epoch 15 |+=========================================================+====================+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 15 || linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 15 || near_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 15 || linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 15 || inear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermed | | +Epoch 15 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[o | | +Epoch 15 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 15 || linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 15 || near_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 15 || linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 15 || inear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermed | | +Epoch 15 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[o | | +Epoch 15 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 15 || linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 15 || near_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 15 || linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 15 || inear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermed | | +Epoch 15 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[o | | +Epoch 15 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 15 || linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 15 || near_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 15 || linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 15 || inear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermed | | +Epoch 15 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[o | | +Epoch 15 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 15 || linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 15 || near_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 15 || linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 15 || inear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermed | | +Epoch 15 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[o | | +Epoch 15 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 15 || linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 15 || near_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 15 || linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 15 || inear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermed | | +Epoch 15 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[o | | +Epoch 15 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 15 || linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 15 || near_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 15 || linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 15 || inear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermed | | +Epoch 15 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[o | | +Epoch 15 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 15 || linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 15 || near_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 15 || linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 15 || inear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermed | | +Epoch 15 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[o | | +Epoch 15 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 15 || linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 15 || near_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 15 || linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 15 || inear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermed | | +Epoch 15 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[o | | +Epoch 15 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 15 || linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 15 || near_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 15 || linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 15 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 15 || inear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermed | | +Epoch 15 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[o | | +Epoch 15 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 15 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 15 || /linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 15 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 15 || inear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 15 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 15 || /linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 15 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 15 || linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertInterme | | +Epoch 15 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[ | | +Epoch 15 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 15 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 15 || /linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 15 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 15 || inear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 15 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 15 || /linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 15 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 15 || linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertInterme | | +Epoch 15 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +Epoch 15 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 15 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[ | | +Epoch 15 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 15 |+---------------------------------------------------------+--------------------+ +INFO:nncf:Statistics of the quantization algorithm: +Epoch 16 |+--------------------------------+-------+ +Epoch 16 || Statistic's name | Value | +Epoch 16 |+================================+=======+ +Epoch 16 || Ratio of enabled quantizations | 100 | +Epoch 16 |+--------------------------------+-------+ +Epoch 16 | +Epoch 16 |Statistics of the quantization share: +Epoch 16 |+----------------------------------+--------------------+ +Epoch 16 || Statistic's name | Value | +Epoch 16 |+==================================+====================+ +Epoch 16 || Symmetric WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 16 |+----------------------------------+--------------------+ +Epoch 16 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 16 |+----------------------------------+--------------------+ +Epoch 16 || Signed WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 16 |+----------------------------------+--------------------+ +Epoch 16 || Unsigned WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 16 |+----------------------------------+--------------------+ +Epoch 16 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 16 |+----------------------------------+--------------------+ +Epoch 16 || Per-channel WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 16 |+----------------------------------+--------------------+ +Epoch 16 || Placed WQs / Potential WQs | 72.55 % (74 / 102) | +Epoch 16 |+----------------------------------+--------------------+ +Epoch 16 || Symmetric AQs / All placed AQs | 24.24 % (24 / 99) | +Epoch 16 |+----------------------------------+--------------------+ +Epoch 16 || Asymmetric AQs / All placed AQs | 75.76 % (75 / 99) | +Epoch 16 |+----------------------------------+--------------------+ +Epoch 16 || Signed AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 16 |+----------------------------------+--------------------+ +Epoch 16 || Unsigned AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 16 |+----------------------------------+--------------------+ +Epoch 16 || Per-tensor AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 16 |+----------------------------------+--------------------+ +Epoch 16 || Per-channel AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 16 |+----------------------------------+--------------------+ +Epoch 16 | +Epoch 16 |Statistics of the bitwidth distribution: +Epoch 16 |+--------------+---------------------+--------------------+--------------------+ +Epoch 16 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 16 || | WQs | Placed AQs | Qs | +Epoch 16 |+==============+=====================+====================+====================+ +Epoch 16 || 8 | 100.00 % (74 / 74) | 100.00 % (99 / 99) | 100.00 % (173 / | +Epoch 16 || | | | 173) | +Epoch 16 |+--------------+---------------------+--------------------+--------------------+ +Epoch 16 | +Epoch 16 |Statistics of the sparsified model: +Epoch 16 |+-----------------------------------------+-------+ +Epoch 16 || Statistic's name | Value | +Epoch 16 |+=========================================+=======+ +Epoch 16 || Sparsity level of the whole model | 0.592 | +Epoch 16 |+-----------------------------------------+-------+ +Epoch 16 || Sparsity level of all sparsified layers | 0.763 | +Epoch 16 |+-----------------------------------------+-------+ +Epoch 16 | +Epoch 16 |Statistics by sparsified layers: +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 16 |+======================+================+================+=====================+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.565 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[0]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[qu | | | | +Epoch 16 || ery]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.573 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[0]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[ke | | | | +Epoch 16 || y]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.569 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[0]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[va | | | | +Epoch 16 || lue]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.589 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[0]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfOutput[ou | | | | +Epoch 16 || tput]/NNCFLinear[den | | | | +Epoch 16 || se]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [3072, 768] | 0.859 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[0]/Be | | | | +Epoch 16 || rtIntermediate[inter | | | | +Epoch 16 || mediate]/NNCFLinear[ | | | | +Epoch 16 || dense]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 3072] | 0.866 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[0]/Be | | | | +Epoch 16 || rtOutput[output]/NNC | | | | +Epoch 16 || FLinear[dense]/linea | | | | +Epoch 16 || r_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.561 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[1]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[qu | | | | +Epoch 16 || ery]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.564 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[1]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[ke | | | | +Epoch 16 || y]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.570 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[1]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[va | | | | +Epoch 16 || lue]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.591 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[1]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfOutput[ou | | | | +Epoch 16 || tput]/NNCFLinear[den | | | | +Epoch 16 || se]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [3072, 768] | 0.859 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[1]/Be | | | | +Epoch 16 || rtIntermediate[inter | | | | +Epoch 16 || mediate]/NNCFLinear[ | | | | +Epoch 16 || dense]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 3072] | 0.868 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[1]/Be | | | | +Epoch 16 || rtOutput[output]/NNC | | | | +Epoch 16 || FLinear[dense]/linea | | | | +Epoch 16 || r_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.578 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[2]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[qu | | | | +Epoch 16 || ery]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.579 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[2]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[ke | | | | +Epoch 16 || y]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.573 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[2]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[va | | | | +Epoch 16 || lue]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.585 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[2]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfOutput[ou | | | | +Epoch 16 || tput]/NNCFLinear[den | | | | +Epoch 16 || se]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [3072, 768] | 0.860 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[2]/Be | | | | +Epoch 16 || rtIntermediate[inter | | | | +Epoch 16 || mediate]/NNCFLinear[ | | | | +Epoch 16 || dense]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 3072] | 0.867 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[2]/Be | | | | +Epoch 16 || rtOutput[output]/NNC | | | | +Epoch 16 || FLinear[dense]/linea | | | | +Epoch 16 || r_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.558 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[3]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[qu | | | | +Epoch 16 || ery]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.560 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[3]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[ke | | | | +Epoch 16 || y]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.571 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[3]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[va | | | | +Epoch 16 || lue]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.579 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[3]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfOutput[ou | | | | +Epoch 16 || tput]/NNCFLinear[den | | | | +Epoch 16 || se]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [3072, 768] | 0.860 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[3]/Be | | | | +Epoch 16 || rtIntermediate[inter | | | | +Epoch 16 || mediate]/NNCFLinear[ | | | | +Epoch 16 || dense]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 3072] | 0.870 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[3]/Be | | | | +Epoch 16 || rtOutput[output]/NNC | | | | +Epoch 16 || FLinear[dense]/linea | | | | +Epoch 16 || r_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.555 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[4]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[qu | | | | +Epoch 16 || ery]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.555 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[4]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[ke | | | | +Epoch 16 || y]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.563 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[4]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[va | | | | +Epoch 16 || lue]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.575 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[4]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfOutput[ou | | | | +Epoch 16 || tput]/NNCFLinear[den | | | | +Epoch 16 || se]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [3072, 768] | 0.860 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[4]/Be | | | | +Epoch 16 || rtIntermediate[inter | | | | +Epoch 16 || mediate]/NNCFLinear[ | | | | +Epoch 16 || dense]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 3072] | 0.870 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[4]/Be | | | | +Epoch 16 || rtOutput[output]/NNC | | | | +Epoch 16 || FLinear[dense]/linea | | | | +Epoch 16 || r_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.554 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[5]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[qu | | | | +Epoch 16 || ery]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.555 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[5]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[ke | | | | +Epoch 16 || y]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.568 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[5]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[va | | | | +Epoch 16 || lue]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.574 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[5]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfOutput[ou | | | | +Epoch 16 || tput]/NNCFLinear[den | | | | +Epoch 16 || se]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [3072, 768] | 0.860 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[5]/Be | | | | +Epoch 16 || rtIntermediate[inter | | | | +Epoch 16 || mediate]/NNCFLinear[ | | | | +Epoch 16 || dense]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 3072] | 0.869 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[5]/Be | | | | +Epoch 16 || rtOutput[output]/NNC | | | | +Epoch 16 || FLinear[dense]/linea | | | | +Epoch 16 || r_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.552 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[6]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[qu | | | | +Epoch 16 || ery]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.553 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[6]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[ke | | | | +Epoch 16 || y]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.567 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[6]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[va | | | | +Epoch 16 || lue]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.574 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[6]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfOutput[ou | | | | +Epoch 16 || tput]/NNCFLinear[den | | | | +Epoch 16 || se]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [3072, 768] | 0.860 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[6]/Be | | | | +Epoch 16 || rtIntermediate[inter | | | | +Epoch 16 || mediate]/NNCFLinear[ | | | | +Epoch 16 || dense]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 3072] | 0.867 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[6]/Be | | | | +Epoch 16 || rtOutput[output]/NNC | | | | +Epoch 16 || FLinear[dense]/linea | | | | +Epoch 16 || r_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.552 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[7]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[qu | | | | +Epoch 16 || ery]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.552 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[7]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[ke | | | | +Epoch 16 || y]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.562 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[7]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[va | | | | +Epoch 16 || lue]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.568 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[7]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfOutput[ou | | | | +Epoch 16 || tput]/NNCFLinear[den | | | | +Epoch 16 || se]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [3072, 768] | 0.859 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[7]/Be | | | | +Epoch 16 || rtIntermediate[inter | | | | +Epoch 16 || mediate]/NNCFLinear[ | | | | +Epoch 16 || dense]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 3072] | 0.864 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[7]/Be | | | | +Epoch 16 || rtOutput[output]/NNC | | | | +Epoch 16 || FLinear[dense]/linea | | | | +Epoch 16 || r_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.551 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[8]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[qu | | | | +Epoch 16 || ery]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.552 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[8]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[ke | | | | +Epoch 16 || y]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.555 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[8]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[va | | | | +Epoch 16 || lue]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.562 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[8]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfOutput[ou | | | | +Epoch 16 || tput]/NNCFLinear[den | | | | +Epoch 16 || se]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [3072, 768] | 0.858 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[8]/Be | | | | +Epoch 16 || rtIntermediate[inter | | | | +Epoch 16 || mediate]/NNCFLinear[ | | | | +Epoch 16 || dense]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 3072] | 0.864 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[8]/Be | | | | +Epoch 16 || rtOutput[output]/NNC | | | | +Epoch 16 || FLinear[dense]/linea | | | | +Epoch 16 || r_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.548 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[9]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[qu | | | | +Epoch 16 || ery]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.550 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[9]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[ke | | | | +Epoch 16 || y]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.554 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[9]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfAttention | | | | +Epoch 16 || [self]/NNCFLinear[va | | | | +Epoch 16 || lue]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.556 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[9]/Be | | | | +Epoch 16 || rtAttention[attentio | | | | +Epoch 16 || n]/BertSelfOutput[ou | | | | +Epoch 16 || tput]/NNCFLinear[den | | | | +Epoch 16 || se]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [3072, 768] | 0.862 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[9]/Be | | | | +Epoch 16 || rtIntermediate[inter | | | | +Epoch 16 || mediate]/NNCFLinear[ | | | | +Epoch 16 || dense]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 3072] | 0.868 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[9]/Be | | | | +Epoch 16 || rtOutput[output]/NNC | | | | +Epoch 16 || FLinear[dense]/linea | | | | +Epoch 16 || r_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.549 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[10]/B | | | | +Epoch 16 || ertAttention[attenti | | | | +Epoch 16 || on]/BertSelfAttentio | | | | +Epoch 16 || n[self]/NNCFLinear[q | | | | +Epoch 16 || uery]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.549 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[10]/B | | | | +Epoch 16 || ertAttention[attenti | | | | +Epoch 16 || on]/BertSelfAttentio | | | | +Epoch 16 || n[self]/NNCFLinear[k | | | | +Epoch 16 || ey]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.565 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[10]/B | | | | +Epoch 16 || ertAttention[attenti | | | | +Epoch 16 || on]/BertSelfAttentio | | | | +Epoch 16 || n[self]/NNCFLinear[v | | | | +Epoch 16 || alue]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.560 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[10]/B | | | | +Epoch 16 || ertAttention[attenti | | | | +Epoch 16 || on]/BertSelfOutput[o | | | | +Epoch 16 || utput]/NNCFLinear[de | | | | +Epoch 16 || nse]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [3072, 768] | 0.859 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[10]/B | | | | +Epoch 16 || ertIntermediate[inte | | | | +Epoch 16 || rmediate]/NNCFLinear | | | | +Epoch 16 || [dense]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 3072] | 0.864 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[10]/B | | | | +Epoch 16 || ertOutput[output]/NN | | | | +Epoch 16 || CFLinear[dense]/line | | | | +Epoch 16 || ar_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.551 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[11]/B | | | | +Epoch 16 || ertAttention[attenti | | | | +Epoch 16 || on]/BertSelfAttentio | | | | +Epoch 16 || n[self]/NNCFLinear[q | | | | +Epoch 16 || uery]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.547 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[11]/B | | | | +Epoch 16 || ertAttention[attenti | | | | +Epoch 16 || on]/BertSelfAttentio | | | | +Epoch 16 || n[self]/NNCFLinear[k | | | | +Epoch 16 || ey]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.552 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[11]/B | | | | +Epoch 16 || ertAttention[attenti | | | | +Epoch 16 || on]/BertSelfAttentio | | | | +Epoch 16 || n[self]/NNCFLinear[v | | | | +Epoch 16 || alue]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 768] | 0.549 | 0.694 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[11]/B | | | | +Epoch 16 || ertAttention[attenti | | | | +Epoch 16 || on]/BertSelfOutput[o | | | | +Epoch 16 || utput]/NNCFLinear[de | | | | +Epoch 16 || nse]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [3072, 768] | 0.857 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[11]/B | | | | +Epoch 16 || ertIntermediate[inte | | | | +Epoch 16 || rmediate]/NNCFLinear | | | | +Epoch 16 || [dense]/linear_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 || BertForSequenceClass | [768, 3072] | 0.859 | 2.778 | +Epoch 16 || ification/BertModel[ | | | | +Epoch 16 || bert]/BertEncoder[en | | | | +Epoch 16 || coder]/ModuleList[la | | | | +Epoch 16 || yer]/BertLayer[11]/B | | | | +Epoch 16 || ertOutput[output]/NN | | | | +Epoch 16 || CFLinear[dense]/line | | | | +Epoch 16 || ar_0 | | | | +Epoch 16 |+----------------------+----------------+----------------+---------------------+ +Epoch 16 | +Epoch 16 |Statistics of the magnitude sparsity algorithm: +Epoch 16 |+----------------------------------------------------------------------+-------+ +Epoch 16 || Statistic's name | Value | +Epoch 16 |+======================================================================+=======+ +Epoch 16 || A target level of the sparsity for the algorithm for the current | 0.763 | +Epoch 16 || epoch | | +Epoch 16 |+----------------------------------------------------------------------+-------+ +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || Layer's name | Sparsity threshold | +Epoch 16 |+=========================================================+====================+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 16 || linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 16 || near_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 16 || linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 16 || inear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermed | | +Epoch 16 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[o | | +Epoch 16 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 16 || linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 16 || near_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 16 || linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 16 || inear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermed | | +Epoch 16 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[o | | +Epoch 16 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 16 || linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 16 || near_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 16 || linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 16 || inear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermed | | +Epoch 16 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[o | | +Epoch 16 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 16 || linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 16 || near_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 16 || linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 16 || inear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermed | | +Epoch 16 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[o | | +Epoch 16 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 16 || linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 16 || near_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 16 || linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 16 || inear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermed | | +Epoch 16 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[o | | +Epoch 16 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 16 || linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 16 || near_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 16 || linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 16 || inear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermed | | +Epoch 16 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[o | | +Epoch 16 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 16 || linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 16 || near_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 16 || linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 16 || inear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermed | | +Epoch 16 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[o | | +Epoch 16 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 16 || linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 16 || near_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 16 || linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 16 || inear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermed | | +Epoch 16 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[o | | +Epoch 16 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 16 || linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 16 || near_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 16 || linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 16 || inear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermed | | +Epoch 16 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[o | | +Epoch 16 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 16 || linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 16 || near_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 16 || linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 16 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 16 || inear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermed | | +Epoch 16 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[o | | +Epoch 16 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 16 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 16 || /linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 16 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 16 || inear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 16 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 16 || /linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 16 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 16 || linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertInterme | | +Epoch 16 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[ | | +Epoch 16 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 16 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 16 || /linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 16 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 16 || inear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 16 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 16 || /linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 16 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 16 || linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertInterme | | +Epoch 16 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +Epoch 16 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 16 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[ | | +Epoch 16 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 16 |+---------------------------------------------------------+--------------------+ +INFO:nncf:Statistics of the quantization algorithm: +Epoch 17 |+--------------------------------+-------+ +Epoch 17 || Statistic's name | Value | +Epoch 17 |+================================+=======+ +Epoch 17 || Ratio of enabled quantizations | 100 | +Epoch 17 |+--------------------------------+-------+ +Epoch 17 | +Epoch 17 |Statistics of the quantization share: +Epoch 17 |+----------------------------------+--------------------+ +Epoch 17 || Statistic's name | Value | +Epoch 17 |+==================================+====================+ +Epoch 17 || Symmetric WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 17 |+----------------------------------+--------------------+ +Epoch 17 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 17 |+----------------------------------+--------------------+ +Epoch 17 || Signed WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 17 |+----------------------------------+--------------------+ +Epoch 17 || Unsigned WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 17 |+----------------------------------+--------------------+ +Epoch 17 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 17 |+----------------------------------+--------------------+ +Epoch 17 || Per-channel WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 17 |+----------------------------------+--------------------+ +Epoch 17 || Placed WQs / Potential WQs | 72.55 % (74 / 102) | +Epoch 17 |+----------------------------------+--------------------+ +Epoch 17 || Symmetric AQs / All placed AQs | 24.24 % (24 / 99) | +Epoch 17 |+----------------------------------+--------------------+ +Epoch 17 || Asymmetric AQs / All placed AQs | 75.76 % (75 / 99) | +Epoch 17 |+----------------------------------+--------------------+ +Epoch 17 || Signed AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 17 |+----------------------------------+--------------------+ +Epoch 17 || Unsigned AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 17 |+----------------------------------+--------------------+ +Epoch 17 || Per-tensor AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 17 |+----------------------------------+--------------------+ +Epoch 17 || Per-channel AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 17 |+----------------------------------+--------------------+ +Epoch 17 | +Epoch 17 |Statistics of the bitwidth distribution: +Epoch 17 |+--------------+---------------------+--------------------+--------------------+ +Epoch 17 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 17 || | WQs | Placed AQs | Qs | +Epoch 17 |+==============+=====================+====================+====================+ +Epoch 17 || 8 | 100.00 % (74 / 74) | 100.00 % (99 / 99) | 100.00 % (173 / | +Epoch 17 || | | | 173) | +Epoch 17 |+--------------+---------------------+--------------------+--------------------+ +Epoch 17 | +Epoch 17 |Statistics of the sparsified model: +Epoch 17 |+-----------------------------------------+-------+ +Epoch 17 || Statistic's name | Value | +Epoch 17 |+=========================================+=======+ +Epoch 17 || Sparsity level of the whole model | 0.600 | +Epoch 17 |+-----------------------------------------+-------+ +Epoch 17 || Sparsity level of all sparsified layers | 0.774 | +Epoch 17 |+-----------------------------------------+-------+ +Epoch 17 | +Epoch 17 |Statistics by sparsified layers: +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 17 |+======================+================+================+=====================+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.574 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[0]/Be | | | | +Epoch 17 || rtAttention[attentio | | | | +Epoch 17 || n]/BertSelfAttention | | | | +Epoch 17 || [self]/NNCFLinear[qu | | | | +Epoch 17 || ery]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.582 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[0]/Be | | | | +Epoch 17 || rtAttention[attentio | | | | +Epoch 17 || n]/BertSelfAttention | | | | +Epoch 17 || [self]/NNCFLinear[ke | | | | +Epoch 17 || y]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.577 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[0]/Be | | | | +Epoch 17 || rtAttention[attentio | | | | +Epoch 17 || n]/BertSelfAttention | | | | +Epoch 17 || [self]/NNCFLinear[va | | | | +Epoch 17 || lue]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.596 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[0]/Be | | | | +Epoch 17 || rtAttention[attentio | | | | +Epoch 17 || n]/BertSelfOutput[ou | | | | +Epoch 17 || tput]/NNCFLinear[den | | | | +Epoch 17 || se]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [3072, 768] | 0.870 | 2.778 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[0]/Be | | | | +Epoch 17 || rtIntermediate[inter | | | | +Epoch 17 || mediate]/NNCFLinear[ | | | | +Epoch 17 || dense]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 3072] | 0.877 | 2.778 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[0]/Be | | | | +Epoch 17 || rtOutput[output]/NNC | | | | +Epoch 17 || FLinear[dense]/linea | | | | +Epoch 17 || r_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.570 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[1]/Be | | | | +Epoch 17 || rtAttention[attentio | | | | +Epoch 17 || n]/BertSelfAttention | | | | +Epoch 17 || [self]/NNCFLinear[qu | | | | +Epoch 17 || ery]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.574 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[1]/Be | | | | +Epoch 17 || rtAttention[attentio | | | | +Epoch 17 || n]/BertSelfAttention | | | | +Epoch 17 || [self]/NNCFLinear[ke | | | | +Epoch 17 || y]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.577 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[1]/Be | | | | +Epoch 17 || rtAttention[attentio | | | | +Epoch 17 || n]/BertSelfAttention | | | | +Epoch 17 || [self]/NNCFLinear[va | | | | +Epoch 17 || lue]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.598 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[1]/Be | | | | +Epoch 17 || rtAttention[attentio | | | | +Epoch 17 || n]/BertSelfOutput[ou | | | | +Epoch 17 || tput]/NNCFLinear[den | | | | +Epoch 17 || se]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [3072, 768] | 0.871 | 2.778 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[1]/Be | | | | +Epoch 17 || rtIntermediate[inter | | | | +Epoch 17 || mediate]/NNCFLinear[ | | | | +Epoch 17 || dense]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 3072] | 0.880 | 2.778 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[1]/Be | | | | +Epoch 17 || rtOutput[output]/NNC | | | | +Epoch 17 || FLinear[dense]/linea | | | | +Epoch 17 || r_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.589 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[2]/Be | | | | +Epoch 17 || rtAttention[attentio | | | | +Epoch 17 || n]/BertSelfAttention | | | | +Epoch 17 || [self]/NNCFLinear[qu | | | | +Epoch 17 || ery]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.590 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[2]/Be | | | | +Epoch 17 || rtAttention[attentio | | | | +Epoch 17 || n]/BertSelfAttention | | | | +Epoch 17 || [self]/NNCFLinear[ke | | | | +Epoch 17 || y]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.580 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[2]/Be | | | | +Epoch 17 || rtAttention[attentio | | | | +Epoch 17 || n]/BertSelfAttention | | | | +Epoch 17 || [self]/NNCFLinear[va | | | | +Epoch 17 || lue]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[2]/Be | | | | +Epoch 17 || rtAttention[attentio | | | | +Epoch 17 || n]/BertSelfOutput[ou | | | | +Epoch 17 || tput]/NNCFLinear[den | | | | +Epoch 17 || se]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [3072, 768] | 0.872 | 2.778 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[2]/Be | | | | +Epoch 17 || rtIntermediate[inter | | | | +Epoch 17 || mediate]/NNCFLinear[ | | | | +Epoch 17 || dense]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 3072] | 0.878 | 2.778 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[2]/Be | | | | +Epoch 17 || rtOutput[output]/NNC | | | | +Epoch 17 || FLinear[dense]/linea | | | | +Epoch 17 || r_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.568 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[3]/Be | | | | +Epoch 17 || rtAttention[attentio | | | | +Epoch 17 || n]/BertSelfAttention | | | | +Epoch 17 || [self]/NNCFLinear[qu | | | | +Epoch 17 || ery]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.569 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[3]/Be | | | | +Epoch 17 || rtAttention[attentio | | | | +Epoch 17 || n]/BertSelfAttention | | | | +Epoch 17 || [self]/NNCFLinear[ke | | | | +Epoch 17 || y]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.578 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[3]/Be | | | | +Epoch 17 || rtAttention[attentio | | | | +Epoch 17 || n]/BertSelfAttention | | | | +Epoch 17 || [self]/NNCFLinear[va | | | | +Epoch 17 || lue]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.587 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[3]/Be | | | | +Epoch 17 || rtAttention[attentio | | | | +Epoch 17 || n]/BertSelfOutput[ou | | | | +Epoch 17 || tput]/NNCFLinear[den | | | | +Epoch 17 || se]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [3072, 768] | 0.872 | 2.778 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[3]/Be | | | | +Epoch 17 || rtIntermediate[inter | | | | +Epoch 17 || mediate]/NNCFLinear[ | | | | +Epoch 17 || dense]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 3072] | 0.881 | 2.778 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[3]/Be | | | | +Epoch 17 || rtOutput[output]/NNC | | | | +Epoch 17 || FLinear[dense]/linea | | | | +Epoch 17 || r_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.564 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[4]/Be | | | | +Epoch 17 || rtAttention[attentio | | | | +Epoch 17 || n]/BertSelfAttention | | | | +Epoch 17 || [self]/NNCFLinear[qu | | | | +Epoch 17 || ery]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.564 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[4]/Be | | | | +Epoch 17 || rtAttention[attentio | | | | +Epoch 17 || n]/BertSelfAttention | | | | +Epoch 17 || [self]/NNCFLinear[ke | | | | +Epoch 17 || y]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.571 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[4]/Be | | | | +Epoch 17 || rtAttention[attentio | | | | +Epoch 17 || n]/BertSelfAttention | | | | +Epoch 17 || [self]/NNCFLinear[va | | | | +Epoch 17 || lue]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.583 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[4]/Be | | | | +Epoch 17 || rtAttention[attentio | | | | +Epoch 17 || n]/BertSelfOutput[ou | | | | +Epoch 17 || tput]/NNCFLinear[den | | | | +Epoch 17 || se]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [3072, 768] | 0.872 | 2.778 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[4]/Be | | | | +Epoch 17 || rtIntermediate[inter | | | | +Epoch 17 || mediate]/NNCFLinear[ | | | | +Epoch 17 || dense]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 3072] | 0.881 | 2.778 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[4]/Be | | | | +Epoch 17 || rtOutput[output]/NNC | | | | +Epoch 17 || FLinear[dense]/linea | | | | +Epoch 17 || r_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.564 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[5]/Be | | | | +Epoch 17 || rtAttention[attentio | | | | +Epoch 17 || n]/BertSelfAttention | | | | +Epoch 17 || [self]/NNCFLinear[qu | | | | +Epoch 17 || ery]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.565 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[5]/Be | | | | +Epoch 17 || rtAttention[attentio | | | | +Epoch 17 || n]/BertSelfAttention | | | | +Epoch 17 || [self]/NNCFLinear[ke | | | | +Epoch 17 || y]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.577 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[5]/Be | | | | +Epoch 17 || rtAttention[attentio | | | | +Epoch 17 || n]/BertSelfAttention | | | | +Epoch 17 || [self]/NNCFLinear[va | | | | +Epoch 17 || lue]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.582 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[5]/Be | | | | +Epoch 17 || rtAttention[attentio | | | | +Epoch 17 || n]/BertSelfOutput[ou | | | | +Epoch 17 || tput]/NNCFLinear[den | | | | +Epoch 17 || se]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [3072, 768] | 0.872 | 2.778 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[5]/Be | | | | +Epoch 17 || rtIntermediate[inter | | | | +Epoch 17 || mediate]/NNCFLinear[ | | | | +Epoch 17 || dense]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 3072] | 0.880 | 2.778 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[5]/Be | | | | +Epoch 17 || rtOutput[output]/NNC | | | | +Epoch 17 || FLinear[dense]/linea | | | | +Epoch 17 || r_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.562 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[6]/Be | | | | +Epoch 17 || rtAttention[attentio | | | | +Epoch 17 || n]/BertSelfAttention | | | | +Epoch 17 || [self]/NNCFLinear[qu | | | | +Epoch 17 || ery]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.562 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[6]/Be | | | | +Epoch 17 || rtAttention[attentio | | | | +Epoch 17 || n]/BertSelfAttention | | | | +Epoch 17 || [self]/NNCFLinear[ke | | | | +Epoch 17 || y]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.575 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[6]/Be | | | | +Epoch 17 || rtAttention[attentio | | | | +Epoch 17 || n]/BertSelfAttention | | | | +Epoch 17 || [self]/NNCFLinear[va | | | | +Epoch 17 || lue]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.582 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[6]/Be | | | | +Epoch 17 || rtAttention[attentio | | | | +Epoch 17 || n]/BertSelfOutput[ou | | | | +Epoch 17 || tput]/NNCFLinear[den | | | | +Epoch 17 || se]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [3072, 768] | 0.872 | 2.778 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[6]/Be | | | | +Epoch 17 || rtIntermediate[inter | | | | +Epoch 17 || mediate]/NNCFLinear[ | | | | +Epoch 17 || dense]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 3072] | 0.879 | 2.778 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[6]/Be | | | | +Epoch 17 || rtOutput[output]/NNC | | | | +Epoch 17 || FLinear[dense]/linea | | | | +Epoch 17 || r_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.562 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[7]/Be | | | | +Epoch 17 || rtAttention[attentio | | | | +Epoch 17 || n]/BertSelfAttention | | | | +Epoch 17 || [self]/NNCFLinear[qu | | | | +Epoch 17 || ery]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.562 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[7]/Be | | | | +Epoch 17 || rtAttention[attentio | | | | +Epoch 17 || n]/BertSelfAttention | | | | +Epoch 17 || [self]/NNCFLinear[ke | | | | +Epoch 17 || y]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.570 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[7]/Be | | | | +Epoch 17 || rtAttention[attentio | | | | +Epoch 17 || n]/BertSelfAttention | | | | +Epoch 17 || [self]/NNCFLinear[va | | | | +Epoch 17 || lue]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.576 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[7]/Be | | | | +Epoch 17 || rtAttention[attentio | | | | +Epoch 17 || n]/BertSelfOutput[ou | | | | +Epoch 17 || tput]/NNCFLinear[den | | | | +Epoch 17 || se]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [3072, 768] | 0.871 | 2.778 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[7]/Be | | | | +Epoch 17 || rtIntermediate[inter | | | | +Epoch 17 || mediate]/NNCFLinear[ | | | | +Epoch 17 || dense]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 3072] | 0.876 | 2.778 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[7]/Be | | | | +Epoch 17 || rtOutput[output]/NNC | | | | +Epoch 17 || FLinear[dense]/linea | | | | +Epoch 17 || r_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.562 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[8]/Be | | | | +Epoch 17 || rtAttention[attentio | | | | +Epoch 17 || n]/BertSelfAttention | | | | +Epoch 17 || [self]/NNCFLinear[qu | | | | +Epoch 17 || ery]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.562 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[8]/Be | | | | +Epoch 17 || rtAttention[attentio | | | | +Epoch 17 || n]/BertSelfAttention | | | | +Epoch 17 || [self]/NNCFLinear[ke | | | | +Epoch 17 || y]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.565 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[8]/Be | | | | +Epoch 17 || rtAttention[attentio | | | | +Epoch 17 || n]/BertSelfAttention | | | | +Epoch 17 || [self]/NNCFLinear[va | | | | +Epoch 17 || lue]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.571 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[8]/Be | | | | +Epoch 17 || rtAttention[attentio | | | | +Epoch 17 || n]/BertSelfOutput[ou | | | | +Epoch 17 || tput]/NNCFLinear[den | | | | +Epoch 17 || se]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [3072, 768] | 0.870 | 2.778 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[8]/Be | | | | +Epoch 17 || rtIntermediate[inter | | | | +Epoch 17 || mediate]/NNCFLinear[ | | | | +Epoch 17 || dense]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 3072] | 0.876 | 2.778 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[8]/Be | | | | +Epoch 17 || rtOutput[output]/NNC | | | | +Epoch 17 || FLinear[dense]/linea | | | | +Epoch 17 || r_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.558 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[9]/Be | | | | +Epoch 17 || rtAttention[attentio | | | | +Epoch 17 || n]/BertSelfAttention | | | | +Epoch 17 || [self]/NNCFLinear[qu | | | | +Epoch 17 || ery]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.560 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[9]/Be | | | | +Epoch 17 || rtAttention[attentio | | | | +Epoch 17 || n]/BertSelfAttention | | | | +Epoch 17 || [self]/NNCFLinear[ke | | | | +Epoch 17 || y]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.563 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[9]/Be | | | | +Epoch 17 || rtAttention[attentio | | | | +Epoch 17 || n]/BertSelfAttention | | | | +Epoch 17 || [self]/NNCFLinear[va | | | | +Epoch 17 || lue]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.565 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[9]/Be | | | | +Epoch 17 || rtAttention[attentio | | | | +Epoch 17 || n]/BertSelfOutput[ou | | | | +Epoch 17 || tput]/NNCFLinear[den | | | | +Epoch 17 || se]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [3072, 768] | 0.875 | 2.778 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[9]/Be | | | | +Epoch 17 || rtIntermediate[inter | | | | +Epoch 17 || mediate]/NNCFLinear[ | | | | +Epoch 17 || dense]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 3072] | 0.880 | 2.778 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[9]/Be | | | | +Epoch 17 || rtOutput[output]/NNC | | | | +Epoch 17 || FLinear[dense]/linea | | | | +Epoch 17 || r_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.560 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[10]/B | | | | +Epoch 17 || ertAttention[attenti | | | | +Epoch 17 || on]/BertSelfAttentio | | | | +Epoch 17 || n[self]/NNCFLinear[q | | | | +Epoch 17 || uery]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.560 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[10]/B | | | | +Epoch 17 || ertAttention[attenti | | | | +Epoch 17 || on]/BertSelfAttentio | | | | +Epoch 17 || n[self]/NNCFLinear[k | | | | +Epoch 17 || ey]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.574 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[10]/B | | | | +Epoch 17 || ertAttention[attenti | | | | +Epoch 17 || on]/BertSelfAttentio | | | | +Epoch 17 || n[self]/NNCFLinear[v | | | | +Epoch 17 || alue]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.569 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[10]/B | | | | +Epoch 17 || ertAttention[attenti | | | | +Epoch 17 || on]/BertSelfOutput[o | | | | +Epoch 17 || utput]/NNCFLinear[de | | | | +Epoch 17 || nse]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [3072, 768] | 0.871 | 2.778 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[10]/B | | | | +Epoch 17 || ertIntermediate[inte | | | | +Epoch 17 || rmediate]/NNCFLinear | | | | +Epoch 17 || [dense]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 3072] | 0.876 | 2.778 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[10]/B | | | | +Epoch 17 || ertOutput[output]/NN | | | | +Epoch 17 || CFLinear[dense]/line | | | | +Epoch 17 || ar_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.563 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[11]/B | | | | +Epoch 17 || ertAttention[attenti | | | | +Epoch 17 || on]/BertSelfAttentio | | | | +Epoch 17 || n[self]/NNCFLinear[q | | | | +Epoch 17 || uery]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.558 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[11]/B | | | | +Epoch 17 || ertAttention[attenti | | | | +Epoch 17 || on]/BertSelfAttentio | | | | +Epoch 17 || n[self]/NNCFLinear[k | | | | +Epoch 17 || ey]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.564 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[11]/B | | | | +Epoch 17 || ertAttention[attenti | | | | +Epoch 17 || on]/BertSelfAttentio | | | | +Epoch 17 || n[self]/NNCFLinear[v | | | | +Epoch 17 || alue]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 768] | 0.560 | 0.694 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[11]/B | | | | +Epoch 17 || ertAttention[attenti | | | | +Epoch 17 || on]/BertSelfOutput[o | | | | +Epoch 17 || utput]/NNCFLinear[de | | | | +Epoch 17 || nse]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [3072, 768] | 0.870 | 2.778 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[11]/B | | | | +Epoch 17 || ertIntermediate[inte | | | | +Epoch 17 || rmediate]/NNCFLinear | | | | +Epoch 17 || [dense]/linear_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 || BertForSequenceClass | [768, 3072] | 0.873 | 2.778 | +Epoch 17 || ification/BertModel[ | | | | +Epoch 17 || bert]/BertEncoder[en | | | | +Epoch 17 || coder]/ModuleList[la | | | | +Epoch 17 || yer]/BertLayer[11]/B | | | | +Epoch 17 || ertOutput[output]/NN | | | | +Epoch 17 || CFLinear[dense]/line | | | | +Epoch 17 || ar_0 | | | | +Epoch 17 |+----------------------+----------------+----------------+---------------------+ +Epoch 17 | +Epoch 17 |Statistics of the magnitude sparsity algorithm: +Epoch 17 |+----------------------------------------------------------------------+-------+ +Epoch 17 || Statistic's name | Value | +Epoch 17 |+======================================================================+=======+ +Epoch 17 || A target level of the sparsity for the algorithm for the current | 0.774 | +Epoch 17 || epoch | | +Epoch 17 |+----------------------------------------------------------------------+-------+ +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || Layer's name | Sparsity threshold | +Epoch 17 |+=========================================================+====================+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 17 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 17 || linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 17 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 17 || near_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 17 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 17 || linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 17 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 17 || inear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermed | | +Epoch 17 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[o | | +Epoch 17 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 17 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 17 || linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 17 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 17 || near_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 17 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 17 || linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 17 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 17 || inear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermed | | +Epoch 17 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[o | | +Epoch 17 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 17 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 17 || linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 17 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 17 || near_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 17 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 17 || linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 17 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 17 || inear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermed | | +Epoch 17 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[o | | +Epoch 17 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 17 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 17 || linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 17 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 17 || near_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 17 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 17 || linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 17 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 17 || inear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermed | | +Epoch 17 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[o | | +Epoch 17 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 17 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 17 || linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 17 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 17 || near_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 17 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 17 || linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 17 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 17 || inear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermed | | +Epoch 17 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[o | | +Epoch 17 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 17 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 17 || linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 17 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 17 || near_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 17 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 17 || linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 17 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 17 || inear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermed | | +Epoch 17 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[o | | +Epoch 17 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 17 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 17 || linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 17 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 17 || near_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 17 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 17 || linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 17 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 17 || inear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermed | | +Epoch 17 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[o | | +Epoch 17 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 17 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 17 || linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 17 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 17 || near_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 17 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 17 || linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 17 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 17 || inear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermed | | +Epoch 17 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[o | | +Epoch 17 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 17 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 17 || linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 17 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 17 || near_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 17 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 17 || linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 17 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 17 || inear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermed | | +Epoch 17 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[o | | +Epoch 17 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 17 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 17 || linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 17 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 17 || near_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 17 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 17 || linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 17 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 17 || inear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermed | | +Epoch 17 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[o | | +Epoch 17 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 17 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 17 || /linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 17 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 17 || inear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 17 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 17 || /linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 17 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 17 || linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertInterme | | +Epoch 17 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[ | | +Epoch 17 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 17 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 17 || /linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 17 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 17 || inear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 17 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 17 || /linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 17 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 17 || linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertInterme | | +Epoch 17 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +Epoch 17 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 17 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[ | | +Epoch 17 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 17 |+---------------------------------------------------------+--------------------+ +INFO:nncf:Statistics of the quantization algorithm: +Epoch 18 |+--------------------------------+-------+ +Epoch 18 || Statistic's name | Value | +Epoch 18 |+================================+=======+ +Epoch 18 || Ratio of enabled quantizations | 100 | +Epoch 18 |+--------------------------------+-------+ +Epoch 18 | +Epoch 18 |Statistics of the quantization share: +Epoch 18 |+----------------------------------+--------------------+ +Epoch 18 || Statistic's name | Value | +Epoch 18 |+==================================+====================+ +Epoch 18 || Symmetric WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 18 |+----------------------------------+--------------------+ +Epoch 18 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 18 |+----------------------------------+--------------------+ +Epoch 18 || Signed WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 18 |+----------------------------------+--------------------+ +Epoch 18 || Unsigned WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 18 |+----------------------------------+--------------------+ +Epoch 18 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 18 |+----------------------------------+--------------------+ +Epoch 18 || Per-channel WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 18 |+----------------------------------+--------------------+ +Epoch 18 || Placed WQs / Potential WQs | 72.55 % (74 / 102) | +Epoch 18 |+----------------------------------+--------------------+ +Epoch 18 || Symmetric AQs / All placed AQs | 24.24 % (24 / 99) | +Epoch 18 |+----------------------------------+--------------------+ +Epoch 18 || Asymmetric AQs / All placed AQs | 75.76 % (75 / 99) | +Epoch 18 |+----------------------------------+--------------------+ +Epoch 18 || Signed AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 18 |+----------------------------------+--------------------+ +Epoch 18 || Unsigned AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 18 |+----------------------------------+--------------------+ +Epoch 18 || Per-tensor AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 18 |+----------------------------------+--------------------+ +Epoch 18 || Per-channel AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 18 |+----------------------------------+--------------------+ +Epoch 18 | +Epoch 18 |Statistics of the bitwidth distribution: +Epoch 18 |+--------------+---------------------+--------------------+--------------------+ +Epoch 18 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 18 || | WQs | Placed AQs | Qs | +Epoch 18 |+==============+=====================+====================+====================+ +Epoch 18 || 8 | 100.00 % (74 / 74) | 100.00 % (99 / 99) | 100.00 % (173 / | +Epoch 18 || | | | 173) | +Epoch 18 |+--------------+---------------------+--------------------+--------------------+ +Epoch 18 | +Epoch 18 |Statistics of the sparsified model: +Epoch 18 |+-----------------------------------------+-------+ +Epoch 18 || Statistic's name | Value | +Epoch 18 |+=========================================+=======+ +Epoch 18 || Sparsity level of the whole model | 0.607 | +Epoch 18 |+-----------------------------------------+-------+ +Epoch 18 || Sparsity level of all sparsified layers | 0.782 | +Epoch 18 |+-----------------------------------------+-------+ +Epoch 18 | +Epoch 18 |Statistics by sparsified layers: +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 18 |+======================+================+================+=====================+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.583 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[0]/Be | | | | +Epoch 18 || rtAttention[attentio | | | | +Epoch 18 || n]/BertSelfAttention | | | | +Epoch 18 || [self]/NNCFLinear[qu | | | | +Epoch 18 || ery]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.591 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[0]/Be | | | | +Epoch 18 || rtAttention[attentio | | | | +Epoch 18 || n]/BertSelfAttention | | | | +Epoch 18 || [self]/NNCFLinear[ke | | | | +Epoch 18 || y]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.582 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[0]/Be | | | | +Epoch 18 || rtAttention[attentio | | | | +Epoch 18 || n]/BertSelfAttention | | | | +Epoch 18 || [self]/NNCFLinear[va | | | | +Epoch 18 || lue]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.602 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[0]/Be | | | | +Epoch 18 || rtAttention[attentio | | | | +Epoch 18 || n]/BertSelfOutput[ou | | | | +Epoch 18 || tput]/NNCFLinear[den | | | | +Epoch 18 || se]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [3072, 768] | 0.879 | 2.778 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[0]/Be | | | | +Epoch 18 || rtIntermediate[inter | | | | +Epoch 18 || mediate]/NNCFLinear[ | | | | +Epoch 18 || dense]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 3072] | 0.886 | 2.778 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[0]/Be | | | | +Epoch 18 || rtOutput[output]/NNC | | | | +Epoch 18 || FLinear[dense]/linea | | | | +Epoch 18 || r_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.579 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[1]/Be | | | | +Epoch 18 || rtAttention[attentio | | | | +Epoch 18 || n]/BertSelfAttention | | | | +Epoch 18 || [self]/NNCFLinear[qu | | | | +Epoch 18 || ery]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.582 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[1]/Be | | | | +Epoch 18 || rtAttention[attentio | | | | +Epoch 18 || n]/BertSelfAttention | | | | +Epoch 18 || [self]/NNCFLinear[ke | | | | +Epoch 18 || y]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.582 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[1]/Be | | | | +Epoch 18 || rtAttention[attentio | | | | +Epoch 18 || n]/BertSelfAttention | | | | +Epoch 18 || [self]/NNCFLinear[va | | | | +Epoch 18 || lue]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.603 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[1]/Be | | | | +Epoch 18 || rtAttention[attentio | | | | +Epoch 18 || n]/BertSelfOutput[ou | | | | +Epoch 18 || tput]/NNCFLinear[den | | | | +Epoch 18 || se]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [3072, 768] | 0.880 | 2.778 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[1]/Be | | | | +Epoch 18 || rtIntermediate[inter | | | | +Epoch 18 || mediate]/NNCFLinear[ | | | | +Epoch 18 || dense]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 3072] | 0.888 | 2.778 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[1]/Be | | | | +Epoch 18 || rtOutput[output]/NNC | | | | +Epoch 18 || FLinear[dense]/linea | | | | +Epoch 18 || r_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.598 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[2]/Be | | | | +Epoch 18 || rtAttention[attentio | | | | +Epoch 18 || n]/BertSelfAttention | | | | +Epoch 18 || [self]/NNCFLinear[qu | | | | +Epoch 18 || ery]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.599 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[2]/Be | | | | +Epoch 18 || rtAttention[attentio | | | | +Epoch 18 || n]/BertSelfAttention | | | | +Epoch 18 || [self]/NNCFLinear[ke | | | | +Epoch 18 || y]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.586 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[2]/Be | | | | +Epoch 18 || rtAttention[attentio | | | | +Epoch 18 || n]/BertSelfAttention | | | | +Epoch 18 || [self]/NNCFLinear[va | | | | +Epoch 18 || lue]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.597 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[2]/Be | | | | +Epoch 18 || rtAttention[attentio | | | | +Epoch 18 || n]/BertSelfOutput[ou | | | | +Epoch 18 || tput]/NNCFLinear[den | | | | +Epoch 18 || se]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [3072, 768] | 0.881 | 2.778 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[2]/Be | | | | +Epoch 18 || rtIntermediate[inter | | | | +Epoch 18 || mediate]/NNCFLinear[ | | | | +Epoch 18 || dense]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 3072] | 0.887 | 2.778 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[2]/Be | | | | +Epoch 18 || rtOutput[output]/NNC | | | | +Epoch 18 || FLinear[dense]/linea | | | | +Epoch 18 || r_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.576 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[3]/Be | | | | +Epoch 18 || rtAttention[attentio | | | | +Epoch 18 || n]/BertSelfAttention | | | | +Epoch 18 || [self]/NNCFLinear[qu | | | | +Epoch 18 || ery]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.578 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[3]/Be | | | | +Epoch 18 || rtAttention[attentio | | | | +Epoch 18 || n]/BertSelfAttention | | | | +Epoch 18 || [self]/NNCFLinear[ke | | | | +Epoch 18 || y]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.585 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[3]/Be | | | | +Epoch 18 || rtAttention[attentio | | | | +Epoch 18 || n]/BertSelfAttention | | | | +Epoch 18 || [self]/NNCFLinear[va | | | | +Epoch 18 || lue]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.593 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[3]/Be | | | | +Epoch 18 || rtAttention[attentio | | | | +Epoch 18 || n]/BertSelfOutput[ou | | | | +Epoch 18 || tput]/NNCFLinear[den | | | | +Epoch 18 || se]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [3072, 768] | 0.882 | 2.778 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[3]/Be | | | | +Epoch 18 || rtIntermediate[inter | | | | +Epoch 18 || mediate]/NNCFLinear[ | | | | +Epoch 18 || dense]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 3072] | 0.890 | 2.778 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[3]/Be | | | | +Epoch 18 || rtOutput[output]/NNC | | | | +Epoch 18 || FLinear[dense]/linea | | | | +Epoch 18 || r_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.573 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[4]/Be | | | | +Epoch 18 || rtAttention[attentio | | | | +Epoch 18 || n]/BertSelfAttention | | | | +Epoch 18 || [self]/NNCFLinear[qu | | | | +Epoch 18 || ery]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.573 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[4]/Be | | | | +Epoch 18 || rtAttention[attentio | | | | +Epoch 18 || n]/BertSelfAttention | | | | +Epoch 18 || [self]/NNCFLinear[ke | | | | +Epoch 18 || y]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.579 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[4]/Be | | | | +Epoch 18 || rtAttention[attentio | | | | +Epoch 18 || n]/BertSelfAttention | | | | +Epoch 18 || [self]/NNCFLinear[va | | | | +Epoch 18 || lue]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.590 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[4]/Be | | | | +Epoch 18 || rtAttention[attentio | | | | +Epoch 18 || n]/BertSelfOutput[ou | | | | +Epoch 18 || tput]/NNCFLinear[den | | | | +Epoch 18 || se]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [3072, 768] | 0.881 | 2.778 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[4]/Be | | | | +Epoch 18 || rtIntermediate[inter | | | | +Epoch 18 || mediate]/NNCFLinear[ | | | | +Epoch 18 || dense]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 3072] | 0.889 | 2.778 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[4]/Be | | | | +Epoch 18 || rtOutput[output]/NNC | | | | +Epoch 18 || FLinear[dense]/linea | | | | +Epoch 18 || r_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.572 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[5]/Be | | | | +Epoch 18 || rtAttention[attentio | | | | +Epoch 18 || n]/BertSelfAttention | | | | +Epoch 18 || [self]/NNCFLinear[qu | | | | +Epoch 18 || ery]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.574 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[5]/Be | | | | +Epoch 18 || rtAttention[attentio | | | | +Epoch 18 || n]/BertSelfAttention | | | | +Epoch 18 || [self]/NNCFLinear[ke | | | | +Epoch 18 || y]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.584 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[5]/Be | | | | +Epoch 18 || rtAttention[attentio | | | | +Epoch 18 || n]/BertSelfAttention | | | | +Epoch 18 || [self]/NNCFLinear[va | | | | +Epoch 18 || lue]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.589 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[5]/Be | | | | +Epoch 18 || rtAttention[attentio | | | | +Epoch 18 || n]/BertSelfOutput[ou | | | | +Epoch 18 || tput]/NNCFLinear[den | | | | +Epoch 18 || se]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [3072, 768] | 0.881 | 2.778 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[5]/Be | | | | +Epoch 18 || rtIntermediate[inter | | | | +Epoch 18 || mediate]/NNCFLinear[ | | | | +Epoch 18 || dense]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 3072] | 0.889 | 2.778 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[5]/Be | | | | +Epoch 18 || rtOutput[output]/NNC | | | | +Epoch 18 || FLinear[dense]/linea | | | | +Epoch 18 || r_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.570 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[6]/Be | | | | +Epoch 18 || rtAttention[attentio | | | | +Epoch 18 || n]/BertSelfAttention | | | | +Epoch 18 || [self]/NNCFLinear[qu | | | | +Epoch 18 || ery]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.571 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[6]/Be | | | | +Epoch 18 || rtAttention[attentio | | | | +Epoch 18 || n]/BertSelfAttention | | | | +Epoch 18 || [self]/NNCFLinear[ke | | | | +Epoch 18 || y]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.583 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[6]/Be | | | | +Epoch 18 || rtAttention[attentio | | | | +Epoch 18 || n]/BertSelfAttention | | | | +Epoch 18 || [self]/NNCFLinear[va | | | | +Epoch 18 || lue]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.589 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[6]/Be | | | | +Epoch 18 || rtAttention[attentio | | | | +Epoch 18 || n]/BertSelfOutput[ou | | | | +Epoch 18 || tput]/NNCFLinear[den | | | | +Epoch 18 || se]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [3072, 768] | 0.881 | 2.778 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[6]/Be | | | | +Epoch 18 || rtIntermediate[inter | | | | +Epoch 18 || mediate]/NNCFLinear[ | | | | +Epoch 18 || dense]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 3072] | 0.887 | 2.778 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[6]/Be | | | | +Epoch 18 || rtOutput[output]/NNC | | | | +Epoch 18 || FLinear[dense]/linea | | | | +Epoch 18 || r_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.570 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[7]/Be | | | | +Epoch 18 || rtAttention[attentio | | | | +Epoch 18 || n]/BertSelfAttention | | | | +Epoch 18 || [self]/NNCFLinear[qu | | | | +Epoch 18 || ery]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.571 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[7]/Be | | | | +Epoch 18 || rtAttention[attentio | | | | +Epoch 18 || n]/BertSelfAttention | | | | +Epoch 18 || [self]/NNCFLinear[ke | | | | +Epoch 18 || y]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.578 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[7]/Be | | | | +Epoch 18 || rtAttention[attentio | | | | +Epoch 18 || n]/BertSelfAttention | | | | +Epoch 18 || [self]/NNCFLinear[va | | | | +Epoch 18 || lue]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.583 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[7]/Be | | | | +Epoch 18 || rtAttention[attentio | | | | +Epoch 18 || n]/BertSelfOutput[ou | | | | +Epoch 18 || tput]/NNCFLinear[den | | | | +Epoch 18 || se]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [3072, 768] | 0.880 | 2.778 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[7]/Be | | | | +Epoch 18 || rtIntermediate[inter | | | | +Epoch 18 || mediate]/NNCFLinear[ | | | | +Epoch 18 || dense]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 3072] | 0.885 | 2.778 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[7]/Be | | | | +Epoch 18 || rtOutput[output]/NNC | | | | +Epoch 18 || FLinear[dense]/linea | | | | +Epoch 18 || r_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.571 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[8]/Be | | | | +Epoch 18 || rtAttention[attentio | | | | +Epoch 18 || n]/BertSelfAttention | | | | +Epoch 18 || [self]/NNCFLinear[qu | | | | +Epoch 18 || ery]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.571 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[8]/Be | | | | +Epoch 18 || rtAttention[attentio | | | | +Epoch 18 || n]/BertSelfAttention | | | | +Epoch 18 || [self]/NNCFLinear[ke | | | | +Epoch 18 || y]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.573 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[8]/Be | | | | +Epoch 18 || rtAttention[attentio | | | | +Epoch 18 || n]/BertSelfAttention | | | | +Epoch 18 || [self]/NNCFLinear[va | | | | +Epoch 18 || lue]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.578 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[8]/Be | | | | +Epoch 18 || rtAttention[attentio | | | | +Epoch 18 || n]/BertSelfOutput[ou | | | | +Epoch 18 || tput]/NNCFLinear[den | | | | +Epoch 18 || se]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [3072, 768] | 0.880 | 2.778 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[8]/Be | | | | +Epoch 18 || rtIntermediate[inter | | | | +Epoch 18 || mediate]/NNCFLinear[ | | | | +Epoch 18 || dense]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 3072] | 0.885 | 2.778 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[8]/Be | | | | +Epoch 18 || rtOutput[output]/NNC | | | | +Epoch 18 || FLinear[dense]/linea | | | | +Epoch 18 || r_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.567 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[9]/Be | | | | +Epoch 18 || rtAttention[attentio | | | | +Epoch 18 || n]/BertSelfAttention | | | | +Epoch 18 || [self]/NNCFLinear[qu | | | | +Epoch 18 || ery]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.570 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[9]/Be | | | | +Epoch 18 || rtAttention[attentio | | | | +Epoch 18 || n]/BertSelfAttention | | | | +Epoch 18 || [self]/NNCFLinear[ke | | | | +Epoch 18 || y]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.572 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[9]/Be | | | | +Epoch 18 || rtAttention[attentio | | | | +Epoch 18 || n]/BertSelfAttention | | | | +Epoch 18 || [self]/NNCFLinear[va | | | | +Epoch 18 || lue]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.572 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[9]/Be | | | | +Epoch 18 || rtAttention[attentio | | | | +Epoch 18 || n]/BertSelfOutput[ou | | | | +Epoch 18 || tput]/NNCFLinear[den | | | | +Epoch 18 || se]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [3072, 768] | 0.884 | 2.778 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[9]/Be | | | | +Epoch 18 || rtIntermediate[inter | | | | +Epoch 18 || mediate]/NNCFLinear[ | | | | +Epoch 18 || dense]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 3072] | 0.889 | 2.778 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[9]/Be | | | | +Epoch 18 || rtOutput[output]/NNC | | | | +Epoch 18 || FLinear[dense]/linea | | | | +Epoch 18 || r_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.569 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[10]/B | | | | +Epoch 18 || ertAttention[attenti | | | | +Epoch 18 || on]/BertSelfAttentio | | | | +Epoch 18 || n[self]/NNCFLinear[q | | | | +Epoch 18 || uery]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.569 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[10]/B | | | | +Epoch 18 || ertAttention[attenti | | | | +Epoch 18 || on]/BertSelfAttentio | | | | +Epoch 18 || n[self]/NNCFLinear[k | | | | +Epoch 18 || ey]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.583 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[10]/B | | | | +Epoch 18 || ertAttention[attenti | | | | +Epoch 18 || on]/BertSelfAttentio | | | | +Epoch 18 || n[self]/NNCFLinear[v | | | | +Epoch 18 || alue]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.578 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[10]/B | | | | +Epoch 18 || ertAttention[attenti | | | | +Epoch 18 || on]/BertSelfOutput[o | | | | +Epoch 18 || utput]/NNCFLinear[de | | | | +Epoch 18 || nse]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [3072, 768] | 0.881 | 2.778 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[10]/B | | | | +Epoch 18 || ertIntermediate[inte | | | | +Epoch 18 || rmediate]/NNCFLinear | | | | +Epoch 18 || [dense]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 3072] | 0.885 | 2.778 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[10]/B | | | | +Epoch 18 || ertOutput[output]/NN | | | | +Epoch 18 || CFLinear[dense]/line | | | | +Epoch 18 || ar_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.572 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[11]/B | | | | +Epoch 18 || ertAttention[attenti | | | | +Epoch 18 || on]/BertSelfAttentio | | | | +Epoch 18 || n[self]/NNCFLinear[q | | | | +Epoch 18 || uery]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.568 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[11]/B | | | | +Epoch 18 || ertAttention[attenti | | | | +Epoch 18 || on]/BertSelfAttentio | | | | +Epoch 18 || n[self]/NNCFLinear[k | | | | +Epoch 18 || ey]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.573 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[11]/B | | | | +Epoch 18 || ertAttention[attenti | | | | +Epoch 18 || on]/BertSelfAttentio | | | | +Epoch 18 || n[self]/NNCFLinear[v | | | | +Epoch 18 || alue]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 768] | 0.569 | 0.694 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[11]/B | | | | +Epoch 18 || ertAttention[attenti | | | | +Epoch 18 || on]/BertSelfOutput[o | | | | +Epoch 18 || utput]/NNCFLinear[de | | | | +Epoch 18 || nse]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [3072, 768] | 0.880 | 2.778 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[11]/B | | | | +Epoch 18 || ertIntermediate[inte | | | | +Epoch 18 || rmediate]/NNCFLinear | | | | +Epoch 18 || [dense]/linear_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 || BertForSequenceClass | [768, 3072] | 0.882 | 2.778 | +Epoch 18 || ification/BertModel[ | | | | +Epoch 18 || bert]/BertEncoder[en | | | | +Epoch 18 || coder]/ModuleList[la | | | | +Epoch 18 || yer]/BertLayer[11]/B | | | | +Epoch 18 || ertOutput[output]/NN | | | | +Epoch 18 || CFLinear[dense]/line | | | | +Epoch 18 || ar_0 | | | | +Epoch 18 |+----------------------+----------------+----------------+---------------------+ +Epoch 18 | +Epoch 18 |Statistics of the magnitude sparsity algorithm: +Epoch 18 |+----------------------------------------------------------------------+-------+ +Epoch 18 || Statistic's name | Value | +Epoch 18 |+======================================================================+=======+ +Epoch 18 || A target level of the sparsity for the algorithm for the current | 0.782 | +Epoch 18 || epoch | | +Epoch 18 |+----------------------------------------------------------------------+-------+ +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || Layer's name | Sparsity threshold | +Epoch 18 |+=========================================================+====================+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 18 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 18 || linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 18 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 18 || near_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 18 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 18 || linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 18 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 18 || inear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermed | | +Epoch 18 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[o | | +Epoch 18 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 18 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 18 || linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 18 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 18 || near_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 18 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 18 || linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 18 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 18 || inear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermed | | +Epoch 18 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[o | | +Epoch 18 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 18 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 18 || linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 18 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 18 || near_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 18 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 18 || linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 18 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 18 || inear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermed | | +Epoch 18 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[o | | +Epoch 18 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 18 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 18 || linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 18 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 18 || near_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 18 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 18 || linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 18 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 18 || inear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermed | | +Epoch 18 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[o | | +Epoch 18 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 18 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 18 || linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 18 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 18 || near_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 18 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 18 || linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 18 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 18 || inear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermed | | +Epoch 18 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[o | | +Epoch 18 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 18 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 18 || linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 18 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 18 || near_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 18 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 18 || linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 18 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 18 || inear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermed | | +Epoch 18 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[o | | +Epoch 18 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 18 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 18 || linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 18 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 18 || near_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 18 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 18 || linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 18 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 18 || inear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermed | | +Epoch 18 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[o | | +Epoch 18 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 18 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 18 || linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 18 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 18 || near_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 18 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 18 || linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 18 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 18 || inear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermed | | +Epoch 18 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[o | | +Epoch 18 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 18 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 18 || linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 18 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 18 || near_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 18 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 18 || linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 18 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 18 || inear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermed | | +Epoch 18 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[o | | +Epoch 18 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 18 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 18 || linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 18 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 18 || near_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 18 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 18 || linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 18 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 18 || inear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermed | | +Epoch 18 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[o | | +Epoch 18 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 18 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 18 || /linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 18 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 18 || inear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 18 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 18 || /linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 18 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 18 || linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertInterme | | +Epoch 18 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[ | | +Epoch 18 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 18 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 18 || /linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 18 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 18 || inear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 18 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 18 || /linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 18 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 18 || linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertInterme | | +Epoch 18 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +Epoch 18 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 18 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[ | | +Epoch 18 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 18 |+---------------------------------------------------------+--------------------+ +INFO:nncf:Statistics of the quantization algorithm: +Epoch 19 |+--------------------------------+-------+ +Epoch 19 || Statistic's name | Value | +Epoch 19 |+================================+=======+ +Epoch 19 || Ratio of enabled quantizations | 100 | +Epoch 19 |+--------------------------------+-------+ +Epoch 19 | +Epoch 19 |Statistics of the quantization share: +Epoch 19 |+----------------------------------+--------------------+ +Epoch 19 || Statistic's name | Value | +Epoch 19 |+==================================+====================+ +Epoch 19 || Symmetric WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 19 |+----------------------------------+--------------------+ +Epoch 19 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 19 |+----------------------------------+--------------------+ +Epoch 19 || Signed WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 19 |+----------------------------------+--------------------+ +Epoch 19 || Unsigned WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 19 |+----------------------------------+--------------------+ +Epoch 19 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 19 |+----------------------------------+--------------------+ +Epoch 19 || Per-channel WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 19 |+----------------------------------+--------------------+ +Epoch 19 || Placed WQs / Potential WQs | 72.55 % (74 / 102) | +Epoch 19 |+----------------------------------+--------------------+ +Epoch 19 || Symmetric AQs / All placed AQs | 24.24 % (24 / 99) | +Epoch 19 |+----------------------------------+--------------------+ +Epoch 19 || Asymmetric AQs / All placed AQs | 75.76 % (75 / 99) | +Epoch 19 |+----------------------------------+--------------------+ +Epoch 19 || Signed AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 19 |+----------------------------------+--------------------+ +Epoch 19 || Unsigned AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 19 |+----------------------------------+--------------------+ +Epoch 19 || Per-tensor AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 19 |+----------------------------------+--------------------+ +Epoch 19 || Per-channel AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 19 |+----------------------------------+--------------------+ +Epoch 19 | +Epoch 19 |Statistics of the bitwidth distribution: +Epoch 19 |+--------------+---------------------+--------------------+--------------------+ +Epoch 19 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 19 || | WQs | Placed AQs | Qs | +Epoch 19 |+==============+=====================+====================+====================+ +Epoch 19 || 8 | 100.00 % (74 / 74) | 100.00 % (99 / 99) | 100.00 % (173 / | +Epoch 19 || | | | 173) | +Epoch 19 |+--------------+---------------------+--------------------+--------------------+ +Epoch 19 | +Epoch 19 |Statistics of the sparsified model: +Epoch 19 |+-----------------------------------------+-------+ +Epoch 19 || Statistic's name | Value | +Epoch 19 |+=========================================+=======+ +Epoch 19 || Sparsity level of the whole model | 0.612 | +Epoch 19 |+-----------------------------------------+-------+ +Epoch 19 || Sparsity level of all sparsified layers | 0.789 | +Epoch 19 |+-----------------------------------------+-------+ +Epoch 19 | +Epoch 19 |Statistics by sparsified layers: +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 19 |+======================+================+================+=====================+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.597 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[0]/Be | | | | +Epoch 19 || rtAttention[attentio | | | | +Epoch 19 || n]/BertSelfAttention | | | | +Epoch 19 || [self]/NNCFLinear[qu | | | | +Epoch 19 || ery]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.605 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[0]/Be | | | | +Epoch 19 || rtAttention[attentio | | | | +Epoch 19 || n]/BertSelfAttention | | | | +Epoch 19 || [self]/NNCFLinear[ke | | | | +Epoch 19 || y]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.602 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[0]/Be | | | | +Epoch 19 || rtAttention[attentio | | | | +Epoch 19 || n]/BertSelfAttention | | | | +Epoch 19 || [self]/NNCFLinear[va | | | | +Epoch 19 || lue]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.622 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[0]/Be | | | | +Epoch 19 || rtAttention[attentio | | | | +Epoch 19 || n]/BertSelfOutput[ou | | | | +Epoch 19 || tput]/NNCFLinear[den | | | | +Epoch 19 || se]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [3072, 768] | 0.883 | 2.778 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[0]/Be | | | | +Epoch 19 || rtIntermediate[inter | | | | +Epoch 19 || mediate]/NNCFLinear[ | | | | +Epoch 19 || dense]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 3072] | 0.889 | 2.778 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[0]/Be | | | | +Epoch 19 || rtOutput[output]/NNC | | | | +Epoch 19 || FLinear[dense]/linea | | | | +Epoch 19 || r_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.593 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[1]/Be | | | | +Epoch 19 || rtAttention[attentio | | | | +Epoch 19 || n]/BertSelfAttention | | | | +Epoch 19 || [self]/NNCFLinear[qu | | | | +Epoch 19 || ery]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.596 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[1]/Be | | | | +Epoch 19 || rtAttention[attentio | | | | +Epoch 19 || n]/BertSelfAttention | | | | +Epoch 19 || [self]/NNCFLinear[ke | | | | +Epoch 19 || y]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.603 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[1]/Be | | | | +Epoch 19 || rtAttention[attentio | | | | +Epoch 19 || n]/BertSelfAttention | | | | +Epoch 19 || [self]/NNCFLinear[va | | | | +Epoch 19 || lue]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.624 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[1]/Be | | | | +Epoch 19 || rtAttention[attentio | | | | +Epoch 19 || n]/BertSelfOutput[ou | | | | +Epoch 19 || tput]/NNCFLinear[den | | | | +Epoch 19 || se]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [3072, 768] | 0.884 | 2.778 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[1]/Be | | | | +Epoch 19 || rtIntermediate[inter | | | | +Epoch 19 || mediate]/NNCFLinear[ | | | | +Epoch 19 || dense]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 3072] | 0.891 | 2.778 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[1]/Be | | | | +Epoch 19 || rtOutput[output]/NNC | | | | +Epoch 19 || FLinear[dense]/linea | | | | +Epoch 19 || r_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.610 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[2]/Be | | | | +Epoch 19 || rtAttention[attentio | | | | +Epoch 19 || n]/BertSelfAttention | | | | +Epoch 19 || [self]/NNCFLinear[qu | | | | +Epoch 19 || ery]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.611 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[2]/Be | | | | +Epoch 19 || rtAttention[attentio | | | | +Epoch 19 || n]/BertSelfAttention | | | | +Epoch 19 || [self]/NNCFLinear[ke | | | | +Epoch 19 || y]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.606 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[2]/Be | | | | +Epoch 19 || rtAttention[attentio | | | | +Epoch 19 || n]/BertSelfAttention | | | | +Epoch 19 || [self]/NNCFLinear[va | | | | +Epoch 19 || lue]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.617 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[2]/Be | | | | +Epoch 19 || rtAttention[attentio | | | | +Epoch 19 || n]/BertSelfOutput[ou | | | | +Epoch 19 || tput]/NNCFLinear[den | | | | +Epoch 19 || se]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [3072, 768] | 0.884 | 2.778 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[2]/Be | | | | +Epoch 19 || rtIntermediate[inter | | | | +Epoch 19 || mediate]/NNCFLinear[ | | | | +Epoch 19 || dense]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 3072] | 0.890 | 2.778 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[2]/Be | | | | +Epoch 19 || rtOutput[output]/NNC | | | | +Epoch 19 || FLinear[dense]/linea | | | | +Epoch 19 || r_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.590 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[3]/Be | | | | +Epoch 19 || rtAttention[attentio | | | | +Epoch 19 || n]/BertSelfAttention | | | | +Epoch 19 || [self]/NNCFLinear[qu | | | | +Epoch 19 || ery]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[3]/Be | | | | +Epoch 19 || rtAttention[attentio | | | | +Epoch 19 || n]/BertSelfAttention | | | | +Epoch 19 || [self]/NNCFLinear[ke | | | | +Epoch 19 || y]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.603 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[3]/Be | | | | +Epoch 19 || rtAttention[attentio | | | | +Epoch 19 || n]/BertSelfAttention | | | | +Epoch 19 || [self]/NNCFLinear[va | | | | +Epoch 19 || lue]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.612 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[3]/Be | | | | +Epoch 19 || rtAttention[attentio | | | | +Epoch 19 || n]/BertSelfOutput[ou | | | | +Epoch 19 || tput]/NNCFLinear[den | | | | +Epoch 19 || se]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [3072, 768] | 0.884 | 2.778 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[3]/Be | | | | +Epoch 19 || rtIntermediate[inter | | | | +Epoch 19 || mediate]/NNCFLinear[ | | | | +Epoch 19 || dense]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 3072] | 0.892 | 2.778 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[3]/Be | | | | +Epoch 19 || rtOutput[output]/NNC | | | | +Epoch 19 || FLinear[dense]/linea | | | | +Epoch 19 || r_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.586 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[4]/Be | | | | +Epoch 19 || rtAttention[attentio | | | | +Epoch 19 || n]/BertSelfAttention | | | | +Epoch 19 || [self]/NNCFLinear[qu | | | | +Epoch 19 || ery]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.587 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[4]/Be | | | | +Epoch 19 || rtAttention[attentio | | | | +Epoch 19 || n]/BertSelfAttention | | | | +Epoch 19 || [self]/NNCFLinear[ke | | | | +Epoch 19 || y]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.595 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[4]/Be | | | | +Epoch 19 || rtAttention[attentio | | | | +Epoch 19 || n]/BertSelfAttention | | | | +Epoch 19 || [self]/NNCFLinear[va | | | | +Epoch 19 || lue]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.607 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[4]/Be | | | | +Epoch 19 || rtAttention[attentio | | | | +Epoch 19 || n]/BertSelfOutput[ou | | | | +Epoch 19 || tput]/NNCFLinear[den | | | | +Epoch 19 || se]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [3072, 768] | 0.884 | 2.778 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[4]/Be | | | | +Epoch 19 || rtIntermediate[inter | | | | +Epoch 19 || mediate]/NNCFLinear[ | | | | +Epoch 19 || dense]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 3072] | 0.891 | 2.778 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[4]/Be | | | | +Epoch 19 || rtOutput[output]/NNC | | | | +Epoch 19 || FLinear[dense]/linea | | | | +Epoch 19 || r_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.586 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[5]/Be | | | | +Epoch 19 || rtAttention[attentio | | | | +Epoch 19 || n]/BertSelfAttention | | | | +Epoch 19 || [self]/NNCFLinear[qu | | | | +Epoch 19 || ery]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.587 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[5]/Be | | | | +Epoch 19 || rtAttention[attentio | | | | +Epoch 19 || n]/BertSelfAttention | | | | +Epoch 19 || [self]/NNCFLinear[ke | | | | +Epoch 19 || y]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[5]/Be | | | | +Epoch 19 || rtAttention[attentio | | | | +Epoch 19 || n]/BertSelfAttention | | | | +Epoch 19 || [self]/NNCFLinear[va | | | | +Epoch 19 || lue]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.606 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[5]/Be | | | | +Epoch 19 || rtAttention[attentio | | | | +Epoch 19 || n]/BertSelfOutput[ou | | | | +Epoch 19 || tput]/NNCFLinear[den | | | | +Epoch 19 || se]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [3072, 768] | 0.884 | 2.778 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[5]/Be | | | | +Epoch 19 || rtIntermediate[inter | | | | +Epoch 19 || mediate]/NNCFLinear[ | | | | +Epoch 19 || dense]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 3072] | 0.891 | 2.778 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[5]/Be | | | | +Epoch 19 || rtOutput[output]/NNC | | | | +Epoch 19 || FLinear[dense]/linea | | | | +Epoch 19 || r_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.584 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[6]/Be | | | | +Epoch 19 || rtAttention[attentio | | | | +Epoch 19 || n]/BertSelfAttention | | | | +Epoch 19 || [self]/NNCFLinear[qu | | | | +Epoch 19 || ery]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.585 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[6]/Be | | | | +Epoch 19 || rtAttention[attentio | | | | +Epoch 19 || n]/BertSelfAttention | | | | +Epoch 19 || [self]/NNCFLinear[ke | | | | +Epoch 19 || y]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.599 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[6]/Be | | | | +Epoch 19 || rtAttention[attentio | | | | +Epoch 19 || n]/BertSelfAttention | | | | +Epoch 19 || [self]/NNCFLinear[va | | | | +Epoch 19 || lue]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.606 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[6]/Be | | | | +Epoch 19 || rtAttention[attentio | | | | +Epoch 19 || n]/BertSelfOutput[ou | | | | +Epoch 19 || tput]/NNCFLinear[den | | | | +Epoch 19 || se]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [3072, 768] | 0.883 | 2.778 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[6]/Be | | | | +Epoch 19 || rtIntermediate[inter | | | | +Epoch 19 || mediate]/NNCFLinear[ | | | | +Epoch 19 || dense]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 3072] | 0.889 | 2.778 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[6]/Be | | | | +Epoch 19 || rtOutput[output]/NNC | | | | +Epoch 19 || FLinear[dense]/linea | | | | +Epoch 19 || r_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.583 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[7]/Be | | | | +Epoch 19 || rtAttention[attentio | | | | +Epoch 19 || n]/BertSelfAttention | | | | +Epoch 19 || [self]/NNCFLinear[qu | | | | +Epoch 19 || ery]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.584 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[7]/Be | | | | +Epoch 19 || rtAttention[attentio | | | | +Epoch 19 || n]/BertSelfAttention | | | | +Epoch 19 || [self]/NNCFLinear[ke | | | | +Epoch 19 || y]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[7]/Be | | | | +Epoch 19 || rtAttention[attentio | | | | +Epoch 19 || n]/BertSelfAttention | | | | +Epoch 19 || [self]/NNCFLinear[va | | | | +Epoch 19 || lue]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[7]/Be | | | | +Epoch 19 || rtAttention[attentio | | | | +Epoch 19 || n]/BertSelfOutput[ou | | | | +Epoch 19 || tput]/NNCFLinear[den | | | | +Epoch 19 || se]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [3072, 768] | 0.883 | 2.778 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[7]/Be | | | | +Epoch 19 || rtIntermediate[inter | | | | +Epoch 19 || mediate]/NNCFLinear[ | | | | +Epoch 19 || dense]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 3072] | 0.887 | 2.778 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[7]/Be | | | | +Epoch 19 || rtOutput[output]/NNC | | | | +Epoch 19 || FLinear[dense]/linea | | | | +Epoch 19 || r_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.583 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[8]/Be | | | | +Epoch 19 || rtAttention[attentio | | | | +Epoch 19 || n]/BertSelfAttention | | | | +Epoch 19 || [self]/NNCFLinear[qu | | | | +Epoch 19 || ery]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.583 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[8]/Be | | | | +Epoch 19 || rtAttention[attentio | | | | +Epoch 19 || n]/BertSelfAttention | | | | +Epoch 19 || [self]/NNCFLinear[ke | | | | +Epoch 19 || y]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.587 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[8]/Be | | | | +Epoch 19 || rtAttention[attentio | | | | +Epoch 19 || n]/BertSelfAttention | | | | +Epoch 19 || [self]/NNCFLinear[va | | | | +Epoch 19 || lue]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[8]/Be | | | | +Epoch 19 || rtAttention[attentio | | | | +Epoch 19 || n]/BertSelfOutput[ou | | | | +Epoch 19 || tput]/NNCFLinear[den | | | | +Epoch 19 || se]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [3072, 768] | 0.882 | 2.778 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[8]/Be | | | | +Epoch 19 || rtIntermediate[inter | | | | +Epoch 19 || mediate]/NNCFLinear[ | | | | +Epoch 19 || dense]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 3072] | 0.887 | 2.778 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[8]/Be | | | | +Epoch 19 || rtOutput[output]/NNC | | | | +Epoch 19 || FLinear[dense]/linea | | | | +Epoch 19 || r_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.578 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[9]/Be | | | | +Epoch 19 || rtAttention[attentio | | | | +Epoch 19 || n]/BertSelfAttention | | | | +Epoch 19 || [self]/NNCFLinear[qu | | | | +Epoch 19 || ery]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.581 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[9]/Be | | | | +Epoch 19 || rtAttention[attentio | | | | +Epoch 19 || n]/BertSelfAttention | | | | +Epoch 19 || [self]/NNCFLinear[ke | | | | +Epoch 19 || y]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.585 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[9]/Be | | | | +Epoch 19 || rtAttention[attentio | | | | +Epoch 19 || n]/BertSelfAttention | | | | +Epoch 19 || [self]/NNCFLinear[va | | | | +Epoch 19 || lue]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.588 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[9]/Be | | | | +Epoch 19 || rtAttention[attentio | | | | +Epoch 19 || n]/BertSelfOutput[ou | | | | +Epoch 19 || tput]/NNCFLinear[den | | | | +Epoch 19 || se]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [3072, 768] | 0.886 | 2.778 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[9]/Be | | | | +Epoch 19 || rtIntermediate[inter | | | | +Epoch 19 || mediate]/NNCFLinear[ | | | | +Epoch 19 || dense]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 3072] | 0.891 | 2.778 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[9]/Be | | | | +Epoch 19 || rtOutput[output]/NNC | | | | +Epoch 19 || FLinear[dense]/linea | | | | +Epoch 19 || r_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.580 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[10]/B | | | | +Epoch 19 || ertAttention[attenti | | | | +Epoch 19 || on]/BertSelfAttentio | | | | +Epoch 19 || n[self]/NNCFLinear[q | | | | +Epoch 19 || uery]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.581 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[10]/B | | | | +Epoch 19 || ertAttention[attenti | | | | +Epoch 19 || on]/BertSelfAttentio | | | | +Epoch 19 || n[self]/NNCFLinear[k | | | | +Epoch 19 || ey]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.596 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[10]/B | | | | +Epoch 19 || ertAttention[attenti | | | | +Epoch 19 || on]/BertSelfAttentio | | | | +Epoch 19 || n[self]/NNCFLinear[v | | | | +Epoch 19 || alue]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.591 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[10]/B | | | | +Epoch 19 || ertAttention[attenti | | | | +Epoch 19 || on]/BertSelfOutput[o | | | | +Epoch 19 || utput]/NNCFLinear[de | | | | +Epoch 19 || nse]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [3072, 768] | 0.883 | 2.778 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[10]/B | | | | +Epoch 19 || ertIntermediate[inte | | | | +Epoch 19 || rmediate]/NNCFLinear | | | | +Epoch 19 || [dense]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 3072] | 0.888 | 2.778 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[10]/B | | | | +Epoch 19 || ertOutput[output]/NN | | | | +Epoch 19 || CFLinear[dense]/line | | | | +Epoch 19 || ar_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.583 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[11]/B | | | | +Epoch 19 || ertAttention[attenti | | | | +Epoch 19 || on]/BertSelfAttentio | | | | +Epoch 19 || n[self]/NNCFLinear[q | | | | +Epoch 19 || uery]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.579 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[11]/B | | | | +Epoch 19 || ertAttention[attenti | | | | +Epoch 19 || on]/BertSelfAttentio | | | | +Epoch 19 || n[self]/NNCFLinear[k | | | | +Epoch 19 || ey]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.585 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[11]/B | | | | +Epoch 19 || ertAttention[attenti | | | | +Epoch 19 || on]/BertSelfAttentio | | | | +Epoch 19 || n[self]/NNCFLinear[v | | | | +Epoch 19 || alue]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 768] | 0.582 | 0.694 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[11]/B | | | | +Epoch 19 || ertAttention[attenti | | | | +Epoch 19 || on]/BertSelfOutput[o | | | | +Epoch 19 || utput]/NNCFLinear[de | | | | +Epoch 19 || nse]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [3072, 768] | 0.881 | 2.778 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[11]/B | | | | +Epoch 19 || ertIntermediate[inte | | | | +Epoch 19 || rmediate]/NNCFLinear | | | | +Epoch 19 || [dense]/linear_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 || BertForSequenceClass | [768, 3072] | 0.883 | 2.778 | +Epoch 19 || ification/BertModel[ | | | | +Epoch 19 || bert]/BertEncoder[en | | | | +Epoch 19 || coder]/ModuleList[la | | | | +Epoch 19 || yer]/BertLayer[11]/B | | | | +Epoch 19 || ertOutput[output]/NN | | | | +Epoch 19 || CFLinear[dense]/line | | | | +Epoch 19 || ar_0 | | | | +Epoch 19 |+----------------------+----------------+----------------+---------------------+ +Epoch 19 | +Epoch 19 |Statistics of the magnitude sparsity algorithm: +Epoch 19 |+----------------------------------------------------------------------+-------+ +Epoch 19 || Statistic's name | Value | +Epoch 19 |+======================================================================+=======+ +Epoch 19 || A target level of the sparsity for the algorithm for the current | 0.789 | +Epoch 19 || epoch | | +Epoch 19 |+----------------------------------------------------------------------+-------+ +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || Layer's name | Sparsity threshold | +Epoch 19 |+=========================================================+====================+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 19 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 19 || linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 19 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 19 || near_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 19 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 19 || linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 19 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 19 || inear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermed | | +Epoch 19 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[o | | +Epoch 19 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 19 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 19 || linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 19 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 19 || near_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 19 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 19 || linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 19 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 19 || inear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermed | | +Epoch 19 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[o | | +Epoch 19 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 19 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 19 || linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 19 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 19 || near_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 19 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 19 || linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 19 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 19 || inear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermed | | +Epoch 19 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[o | | +Epoch 19 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 19 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 19 || linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 19 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 19 || near_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 19 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 19 || linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 19 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 19 || inear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermed | | +Epoch 19 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[o | | +Epoch 19 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 19 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 19 || linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 19 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 19 || near_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 19 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 19 || linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 19 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 19 || inear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermed | | +Epoch 19 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[o | | +Epoch 19 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 19 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 19 || linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 19 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 19 || near_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 19 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 19 || linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 19 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 19 || inear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermed | | +Epoch 19 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[o | | +Epoch 19 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 19 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 19 || linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 19 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 19 || near_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 19 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 19 || linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 19 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 19 || inear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermed | | +Epoch 19 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[o | | +Epoch 19 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 19 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 19 || linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 19 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 19 || near_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 19 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 19 || linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 19 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 19 || inear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermed | | +Epoch 19 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[o | | +Epoch 19 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 19 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 19 || linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 19 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 19 || near_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 19 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 19 || linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 19 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 19 || inear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermed | | +Epoch 19 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[o | | +Epoch 19 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 19 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 19 || linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 19 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 19 || near_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 19 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 19 || linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 19 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 19 || inear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermed | | +Epoch 19 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[o | | +Epoch 19 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 19 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 19 || /linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 19 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 19 || inear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 19 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 19 || /linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 19 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 19 || linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertInterme | | +Epoch 19 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[ | | +Epoch 19 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 19 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 19 || /linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 19 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 19 || inear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 19 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 19 || /linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 19 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 19 || linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertInterme | | +Epoch 19 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +Epoch 19 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 19 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[ | | +Epoch 19 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 19 |+---------------------------------------------------------+--------------------+ +INFO:nncf:Statistics of the quantization algorithm: +Epoch 20 |+--------------------------------+-------+ +Epoch 20 || Statistic's name | Value | +Epoch 20 |+================================+=======+ +Epoch 20 || Ratio of enabled quantizations | 100 | +Epoch 20 |+--------------------------------+-------+ +Epoch 20 | +Epoch 20 |Statistics of the quantization share: +Epoch 20 |+----------------------------------+--------------------+ +Epoch 20 || Statistic's name | Value | +Epoch 20 |+==================================+====================+ +Epoch 20 || Symmetric WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 20 |+----------------------------------+--------------------+ +Epoch 20 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 20 |+----------------------------------+--------------------+ +Epoch 20 || Signed WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 20 |+----------------------------------+--------------------+ +Epoch 20 || Unsigned WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 20 |+----------------------------------+--------------------+ +Epoch 20 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 20 |+----------------------------------+--------------------+ +Epoch 20 || Per-channel WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 20 |+----------------------------------+--------------------+ +Epoch 20 || Placed WQs / Potential WQs | 72.55 % (74 / 102) | +Epoch 20 |+----------------------------------+--------------------+ +Epoch 20 || Symmetric AQs / All placed AQs | 24.24 % (24 / 99) | +Epoch 20 |+----------------------------------+--------------------+ +Epoch 20 || Asymmetric AQs / All placed AQs | 75.76 % (75 / 99) | +Epoch 20 |+----------------------------------+--------------------+ +Epoch 20 || Signed AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 20 |+----------------------------------+--------------------+ +Epoch 20 || Unsigned AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 20 |+----------------------------------+--------------------+ +Epoch 20 || Per-tensor AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 20 |+----------------------------------+--------------------+ +Epoch 20 || Per-channel AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 20 |+----------------------------------+--------------------+ +Epoch 20 | +Epoch 20 |Statistics of the bitwidth distribution: +Epoch 20 |+--------------+---------------------+--------------------+--------------------+ +Epoch 20 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 20 || | WQs | Placed AQs | Qs | +Epoch 20 |+==============+=====================+====================+====================+ +Epoch 20 || 8 | 100.00 % (74 / 74) | 100.00 % (99 / 99) | 100.00 % (173 / | +Epoch 20 || | | | 173) | +Epoch 20 |+--------------+---------------------+--------------------+--------------------+ +Epoch 20 | +Epoch 20 |Statistics of the sparsified model: +Epoch 20 |+-----------------------------------------+-------+ +Epoch 20 || Statistic's name | Value | +Epoch 20 |+=========================================+=======+ +Epoch 20 || Sparsity level of the whole model | 0.616 | +Epoch 20 |+-----------------------------------------+-------+ +Epoch 20 || Sparsity level of all sparsified layers | 0.794 | +Epoch 20 |+-----------------------------------------+-------+ +Epoch 20 | +Epoch 20 |Statistics by sparsified layers: +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 20 |+======================+================+================+=====================+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.603 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[0]/Be | | | | +Epoch 20 || rtAttention[attentio | | | | +Epoch 20 || n]/BertSelfAttention | | | | +Epoch 20 || [self]/NNCFLinear[qu | | | | +Epoch 20 || ery]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.611 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[0]/Be | | | | +Epoch 20 || rtAttention[attentio | | | | +Epoch 20 || n]/BertSelfAttention | | | | +Epoch 20 || [self]/NNCFLinear[ke | | | | +Epoch 20 || y]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.609 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[0]/Be | | | | +Epoch 20 || rtAttention[attentio | | | | +Epoch 20 || n]/BertSelfAttention | | | | +Epoch 20 || [self]/NNCFLinear[va | | | | +Epoch 20 || lue]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.630 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[0]/Be | | | | +Epoch 20 || rtAttention[attentio | | | | +Epoch 20 || n]/BertSelfOutput[ou | | | | +Epoch 20 || tput]/NNCFLinear[den | | | | +Epoch 20 || se]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [3072, 768] | 0.887 | 2.778 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[0]/Be | | | | +Epoch 20 || rtIntermediate[inter | | | | +Epoch 20 || mediate]/NNCFLinear[ | | | | +Epoch 20 || dense]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 3072] | 0.893 | 2.778 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[0]/Be | | | | +Epoch 20 || rtOutput[output]/NNC | | | | +Epoch 20 || FLinear[dense]/linea | | | | +Epoch 20 || r_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.599 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[1]/Be | | | | +Epoch 20 || rtAttention[attentio | | | | +Epoch 20 || n]/BertSelfAttention | | | | +Epoch 20 || [self]/NNCFLinear[qu | | | | +Epoch 20 || ery]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.603 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[1]/Be | | | | +Epoch 20 || rtAttention[attentio | | | | +Epoch 20 || n]/BertSelfAttention | | | | +Epoch 20 || [self]/NNCFLinear[ke | | | | +Epoch 20 || y]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.610 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[1]/Be | | | | +Epoch 20 || rtAttention[attentio | | | | +Epoch 20 || n]/BertSelfAttention | | | | +Epoch 20 || [self]/NNCFLinear[va | | | | +Epoch 20 || lue]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.631 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[1]/Be | | | | +Epoch 20 || rtAttention[attentio | | | | +Epoch 20 || n]/BertSelfOutput[ou | | | | +Epoch 20 || tput]/NNCFLinear[den | | | | +Epoch 20 || se]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [3072, 768] | 0.887 | 2.778 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[1]/Be | | | | +Epoch 20 || rtIntermediate[inter | | | | +Epoch 20 || mediate]/NNCFLinear[ | | | | +Epoch 20 || dense]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 3072] | 0.894 | 2.778 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[1]/Be | | | | +Epoch 20 || rtOutput[output]/NNC | | | | +Epoch 20 || FLinear[dense]/linea | | | | +Epoch 20 || r_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.615 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[2]/Be | | | | +Epoch 20 || rtAttention[attentio | | | | +Epoch 20 || n]/BertSelfAttention | | | | +Epoch 20 || [self]/NNCFLinear[qu | | | | +Epoch 20 || ery]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.617 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[2]/Be | | | | +Epoch 20 || rtAttention[attentio | | | | +Epoch 20 || n]/BertSelfAttention | | | | +Epoch 20 || [self]/NNCFLinear[ke | | | | +Epoch 20 || y]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.613 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[2]/Be | | | | +Epoch 20 || rtAttention[attentio | | | | +Epoch 20 || n]/BertSelfAttention | | | | +Epoch 20 || [self]/NNCFLinear[va | | | | +Epoch 20 || lue]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.625 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[2]/Be | | | | +Epoch 20 || rtAttention[attentio | | | | +Epoch 20 || n]/BertSelfOutput[ou | | | | +Epoch 20 || tput]/NNCFLinear[den | | | | +Epoch 20 || se]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [3072, 768] | 0.888 | 2.778 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[2]/Be | | | | +Epoch 20 || rtIntermediate[inter | | | | +Epoch 20 || mediate]/NNCFLinear[ | | | | +Epoch 20 || dense]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 3072] | 0.893 | 2.778 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[2]/Be | | | | +Epoch 20 || rtOutput[output]/NNC | | | | +Epoch 20 || FLinear[dense]/linea | | | | +Epoch 20 || r_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.596 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[3]/Be | | | | +Epoch 20 || rtAttention[attentio | | | | +Epoch 20 || n]/BertSelfAttention | | | | +Epoch 20 || [self]/NNCFLinear[qu | | | | +Epoch 20 || ery]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.599 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[3]/Be | | | | +Epoch 20 || rtAttention[attentio | | | | +Epoch 20 || n]/BertSelfAttention | | | | +Epoch 20 || [self]/NNCFLinear[ke | | | | +Epoch 20 || y]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.610 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[3]/Be | | | | +Epoch 20 || rtAttention[attentio | | | | +Epoch 20 || n]/BertSelfAttention | | | | +Epoch 20 || [self]/NNCFLinear[va | | | | +Epoch 20 || lue]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.619 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[3]/Be | | | | +Epoch 20 || rtAttention[attentio | | | | +Epoch 20 || n]/BertSelfOutput[ou | | | | +Epoch 20 || tput]/NNCFLinear[den | | | | +Epoch 20 || se]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [3072, 768] | 0.888 | 2.778 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[3]/Be | | | | +Epoch 20 || rtIntermediate[inter | | | | +Epoch 20 || mediate]/NNCFLinear[ | | | | +Epoch 20 || dense]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 3072] | 0.895 | 2.778 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[3]/Be | | | | +Epoch 20 || rtOutput[output]/NNC | | | | +Epoch 20 || FLinear[dense]/linea | | | | +Epoch 20 || r_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.593 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[4]/Be | | | | +Epoch 20 || rtAttention[attentio | | | | +Epoch 20 || n]/BertSelfAttention | | | | +Epoch 20 || [self]/NNCFLinear[qu | | | | +Epoch 20 || ery]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[4]/Be | | | | +Epoch 20 || rtAttention[attentio | | | | +Epoch 20 || n]/BertSelfAttention | | | | +Epoch 20 || [self]/NNCFLinear[ke | | | | +Epoch 20 || y]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.602 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[4]/Be | | | | +Epoch 20 || rtAttention[attentio | | | | +Epoch 20 || n]/BertSelfAttention | | | | +Epoch 20 || [self]/NNCFLinear[va | | | | +Epoch 20 || lue]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.614 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[4]/Be | | | | +Epoch 20 || rtAttention[attentio | | | | +Epoch 20 || n]/BertSelfOutput[ou | | | | +Epoch 20 || tput]/NNCFLinear[den | | | | +Epoch 20 || se]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [3072, 768] | 0.887 | 2.778 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[4]/Be | | | | +Epoch 20 || rtIntermediate[inter | | | | +Epoch 20 || mediate]/NNCFLinear[ | | | | +Epoch 20 || dense]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 3072] | 0.894 | 2.778 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[4]/Be | | | | +Epoch 20 || rtOutput[output]/NNC | | | | +Epoch 20 || FLinear[dense]/linea | | | | +Epoch 20 || r_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[5]/Be | | | | +Epoch 20 || rtAttention[attentio | | | | +Epoch 20 || n]/BertSelfAttention | | | | +Epoch 20 || [self]/NNCFLinear[qu | | | | +Epoch 20 || ery]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[5]/Be | | | | +Epoch 20 || rtAttention[attentio | | | | +Epoch 20 || n]/BertSelfAttention | | | | +Epoch 20 || [self]/NNCFLinear[ke | | | | +Epoch 20 || y]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.606 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[5]/Be | | | | +Epoch 20 || rtAttention[attentio | | | | +Epoch 20 || n]/BertSelfAttention | | | | +Epoch 20 || [self]/NNCFLinear[va | | | | +Epoch 20 || lue]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.613 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[5]/Be | | | | +Epoch 20 || rtAttention[attentio | | | | +Epoch 20 || n]/BertSelfOutput[ou | | | | +Epoch 20 || tput]/NNCFLinear[den | | | | +Epoch 20 || se]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [3072, 768] | 0.888 | 2.778 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[5]/Be | | | | +Epoch 20 || rtIntermediate[inter | | | | +Epoch 20 || mediate]/NNCFLinear[ | | | | +Epoch 20 || dense]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 3072] | 0.894 | 2.778 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[5]/Be | | | | +Epoch 20 || rtOutput[output]/NNC | | | | +Epoch 20 || FLinear[dense]/linea | | | | +Epoch 20 || r_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.590 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[6]/Be | | | | +Epoch 20 || rtAttention[attentio | | | | +Epoch 20 || n]/BertSelfAttention | | | | +Epoch 20 || [self]/NNCFLinear[qu | | | | +Epoch 20 || ery]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.591 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[6]/Be | | | | +Epoch 20 || rtAttention[attentio | | | | +Epoch 20 || n]/BertSelfAttention | | | | +Epoch 20 || [self]/NNCFLinear[ke | | | | +Epoch 20 || y]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.606 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[6]/Be | | | | +Epoch 20 || rtAttention[attentio | | | | +Epoch 20 || n]/BertSelfAttention | | | | +Epoch 20 || [self]/NNCFLinear[va | | | | +Epoch 20 || lue]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.613 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[6]/Be | | | | +Epoch 20 || rtAttention[attentio | | | | +Epoch 20 || n]/BertSelfOutput[ou | | | | +Epoch 20 || tput]/NNCFLinear[den | | | | +Epoch 20 || se]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [3072, 768] | 0.887 | 2.778 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[6]/Be | | | | +Epoch 20 || rtIntermediate[inter | | | | +Epoch 20 || mediate]/NNCFLinear[ | | | | +Epoch 20 || dense]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 3072] | 0.892 | 2.778 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[6]/Be | | | | +Epoch 20 || rtOutput[output]/NNC | | | | +Epoch 20 || FLinear[dense]/linea | | | | +Epoch 20 || r_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.590 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[7]/Be | | | | +Epoch 20 || rtAttention[attentio | | | | +Epoch 20 || n]/BertSelfAttention | | | | +Epoch 20 || [self]/NNCFLinear[qu | | | | +Epoch 20 || ery]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.591 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[7]/Be | | | | +Epoch 20 || rtAttention[attentio | | | | +Epoch 20 || n]/BertSelfAttention | | | | +Epoch 20 || [self]/NNCFLinear[ke | | | | +Epoch 20 || y]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.601 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[7]/Be | | | | +Epoch 20 || rtAttention[attentio | | | | +Epoch 20 || n]/BertSelfAttention | | | | +Epoch 20 || [self]/NNCFLinear[va | | | | +Epoch 20 || lue]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.607 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[7]/Be | | | | +Epoch 20 || rtAttention[attentio | | | | +Epoch 20 || n]/BertSelfOutput[ou | | | | +Epoch 20 || tput]/NNCFLinear[den | | | | +Epoch 20 || se]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [3072, 768] | 0.887 | 2.778 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[7]/Be | | | | +Epoch 20 || rtIntermediate[inter | | | | +Epoch 20 || mediate]/NNCFLinear[ | | | | +Epoch 20 || dense]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 3072] | 0.891 | 2.778 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[7]/Be | | | | +Epoch 20 || rtOutput[output]/NNC | | | | +Epoch 20 || FLinear[dense]/linea | | | | +Epoch 20 || r_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.589 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[8]/Be | | | | +Epoch 20 || rtAttention[attentio | | | | +Epoch 20 || n]/BertSelfAttention | | | | +Epoch 20 || [self]/NNCFLinear[qu | | | | +Epoch 20 || ery]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.590 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[8]/Be | | | | +Epoch 20 || rtAttention[attentio | | | | +Epoch 20 || n]/BertSelfAttention | | | | +Epoch 20 || [self]/NNCFLinear[ke | | | | +Epoch 20 || y]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[8]/Be | | | | +Epoch 20 || rtAttention[attentio | | | | +Epoch 20 || n]/BertSelfAttention | | | | +Epoch 20 || [self]/NNCFLinear[va | | | | +Epoch 20 || lue]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.601 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[8]/Be | | | | +Epoch 20 || rtAttention[attentio | | | | +Epoch 20 || n]/BertSelfOutput[ou | | | | +Epoch 20 || tput]/NNCFLinear[den | | | | +Epoch 20 || se]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [3072, 768] | 0.886 | 2.778 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[8]/Be | | | | +Epoch 20 || rtIntermediate[inter | | | | +Epoch 20 || mediate]/NNCFLinear[ | | | | +Epoch 20 || dense]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 3072] | 0.890 | 2.778 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[8]/Be | | | | +Epoch 20 || rtOutput[output]/NNC | | | | +Epoch 20 || FLinear[dense]/linea | | | | +Epoch 20 || r_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.585 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[9]/Be | | | | +Epoch 20 || rtAttention[attentio | | | | +Epoch 20 || n]/BertSelfAttention | | | | +Epoch 20 || [self]/NNCFLinear[qu | | | | +Epoch 20 || ery]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.588 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[9]/Be | | | | +Epoch 20 || rtAttention[attentio | | | | +Epoch 20 || n]/BertSelfAttention | | | | +Epoch 20 || [self]/NNCFLinear[ke | | | | +Epoch 20 || y]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[9]/Be | | | | +Epoch 20 || rtAttention[attentio | | | | +Epoch 20 || n]/BertSelfAttention | | | | +Epoch 20 || [self]/NNCFLinear[va | | | | +Epoch 20 || lue]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.595 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[9]/Be | | | | +Epoch 20 || rtAttention[attentio | | | | +Epoch 20 || n]/BertSelfOutput[ou | | | | +Epoch 20 || tput]/NNCFLinear[den | | | | +Epoch 20 || se]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [3072, 768] | 0.890 | 2.778 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[9]/Be | | | | +Epoch 20 || rtIntermediate[inter | | | | +Epoch 20 || mediate]/NNCFLinear[ | | | | +Epoch 20 || dense]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 3072] | 0.895 | 2.778 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[9]/Be | | | | +Epoch 20 || rtOutput[output]/NNC | | | | +Epoch 20 || FLinear[dense]/linea | | | | +Epoch 20 || r_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.586 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[10]/B | | | | +Epoch 20 || ertAttention[attenti | | | | +Epoch 20 || on]/BertSelfAttentio | | | | +Epoch 20 || n[self]/NNCFLinear[q | | | | +Epoch 20 || uery]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.587 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[10]/B | | | | +Epoch 20 || ertAttention[attenti | | | | +Epoch 20 || on]/BertSelfAttentio | | | | +Epoch 20 || n[self]/NNCFLinear[k | | | | +Epoch 20 || ey]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.602 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[10]/B | | | | +Epoch 20 || ertAttention[attenti | | | | +Epoch 20 || on]/BertSelfAttentio | | | | +Epoch 20 || n[self]/NNCFLinear[v | | | | +Epoch 20 || alue]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.597 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[10]/B | | | | +Epoch 20 || ertAttention[attenti | | | | +Epoch 20 || on]/BertSelfOutput[o | | | | +Epoch 20 || utput]/NNCFLinear[de | | | | +Epoch 20 || nse]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [3072, 768] | 0.887 | 2.778 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[10]/B | | | | +Epoch 20 || ertIntermediate[inte | | | | +Epoch 20 || rmediate]/NNCFLinear | | | | +Epoch 20 || [dense]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 3072] | 0.892 | 2.778 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[10]/B | | | | +Epoch 20 || ertOutput[output]/NN | | | | +Epoch 20 || CFLinear[dense]/line | | | | +Epoch 20 || ar_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.590 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[11]/B | | | | +Epoch 20 || ertAttention[attenti | | | | +Epoch 20 || on]/BertSelfAttentio | | | | +Epoch 20 || n[self]/NNCFLinear[q | | | | +Epoch 20 || uery]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.586 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[11]/B | | | | +Epoch 20 || ertAttention[attenti | | | | +Epoch 20 || on]/BertSelfAttentio | | | | +Epoch 20 || n[self]/NNCFLinear[k | | | | +Epoch 20 || ey]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.591 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[11]/B | | | | +Epoch 20 || ertAttention[attenti | | | | +Epoch 20 || on]/BertSelfAttentio | | | | +Epoch 20 || n[self]/NNCFLinear[v | | | | +Epoch 20 || alue]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 768] | 0.588 | 0.694 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[11]/B | | | | +Epoch 20 || ertAttention[attenti | | | | +Epoch 20 || on]/BertSelfOutput[o | | | | +Epoch 20 || utput]/NNCFLinear[de | | | | +Epoch 20 || nse]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [3072, 768] | 0.885 | 2.778 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[11]/B | | | | +Epoch 20 || ertIntermediate[inte | | | | +Epoch 20 || rmediate]/NNCFLinear | | | | +Epoch 20 || [dense]/linear_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 || BertForSequenceClass | [768, 3072] | 0.886 | 2.778 | +Epoch 20 || ification/BertModel[ | | | | +Epoch 20 || bert]/BertEncoder[en | | | | +Epoch 20 || coder]/ModuleList[la | | | | +Epoch 20 || yer]/BertLayer[11]/B | | | | +Epoch 20 || ertOutput[output]/NN | | | | +Epoch 20 || CFLinear[dense]/line | | | | +Epoch 20 || ar_0 | | | | +Epoch 20 |+----------------------+----------------+----------------+---------------------+ +Epoch 20 | +Epoch 20 |Statistics of the magnitude sparsity algorithm: +Epoch 20 |+----------------------------------------------------------------------+-------+ +Epoch 20 || Statistic's name | Value | +Epoch 20 |+======================================================================+=======+ +Epoch 20 || A target level of the sparsity for the algorithm for the current | 0.794 | +Epoch 20 || epoch | | +Epoch 20 |+----------------------------------------------------------------------+-------+ +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || Layer's name | Sparsity threshold | +Epoch 20 |+=========================================================+====================+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 20 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 20 || linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 20 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 20 || near_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 20 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 20 || linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 20 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 20 || inear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermed | | +Epoch 20 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[o | | +Epoch 20 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 20 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 20 || linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 20 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 20 || near_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 20 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 20 || linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 20 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 20 || inear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermed | | +Epoch 20 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[o | | +Epoch 20 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 20 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 20 || linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 20 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 20 || near_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 20 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 20 || linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 20 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 20 || inear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermed | | +Epoch 20 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[o | | +Epoch 20 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 20 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 20 || linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 20 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 20 || near_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 20 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 20 || linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 20 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 20 || inear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermed | | +Epoch 20 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[o | | +Epoch 20 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 20 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 20 || linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 20 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 20 || near_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 20 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 20 || linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 20 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 20 || inear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermed | | +Epoch 20 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[o | | +Epoch 20 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 20 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 20 || linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 20 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 20 || near_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 20 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 20 || linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 20 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 20 || inear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermed | | +Epoch 20 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[o | | +Epoch 20 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 20 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 20 || linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 20 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 20 || near_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 20 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 20 || linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 20 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 20 || inear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermed | | +Epoch 20 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[o | | +Epoch 20 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 20 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 20 || linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 20 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 20 || near_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 20 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 20 || linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 20 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 20 || inear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermed | | +Epoch 20 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[o | | +Epoch 20 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 20 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 20 || linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 20 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 20 || near_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 20 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 20 || linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 20 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 20 || inear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermed | | +Epoch 20 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[o | | +Epoch 20 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 20 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 20 || linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 20 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 20 || near_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 20 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 20 || linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 20 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 20 || inear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermed | | +Epoch 20 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[o | | +Epoch 20 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 20 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 20 || /linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 20 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 20 || inear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 20 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 20 || /linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 20 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 20 || linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertInterme | | +Epoch 20 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[ | | +Epoch 20 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 20 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 20 || /linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 20 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 20 || inear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 20 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 20 || /linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 20 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 20 || linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertInterme | | +Epoch 20 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +Epoch 20 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 20 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[ | | +Epoch 20 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 20 |+---------------------------------------------------------+--------------------+ +INFO:nncf:Statistics of the quantization algorithm: +Epoch 21 |+--------------------------------+-------+ +Epoch 21 || Statistic's name | Value | +Epoch 21 |+================================+=======+ +Epoch 21 || Ratio of enabled quantizations | 100 | +Epoch 21 |+--------------------------------+-------+ +Epoch 21 | +Epoch 21 |Statistics of the quantization share: +Epoch 21 |+----------------------------------+--------------------+ +Epoch 21 || Statistic's name | Value | +Epoch 21 |+==================================+====================+ +Epoch 21 || Symmetric WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 21 |+----------------------------------+--------------------+ +Epoch 21 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 21 |+----------------------------------+--------------------+ +Epoch 21 || Signed WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 21 |+----------------------------------+--------------------+ +Epoch 21 || Unsigned WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 21 |+----------------------------------+--------------------+ +Epoch 21 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 21 |+----------------------------------+--------------------+ +Epoch 21 || Per-channel WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 21 |+----------------------------------+--------------------+ +Epoch 21 || Placed WQs / Potential WQs | 72.55 % (74 / 102) | +Epoch 21 |+----------------------------------+--------------------+ +Epoch 21 || Symmetric AQs / All placed AQs | 24.24 % (24 / 99) | +Epoch 21 |+----------------------------------+--------------------+ +Epoch 21 || Asymmetric AQs / All placed AQs | 75.76 % (75 / 99) | +Epoch 21 |+----------------------------------+--------------------+ +Epoch 21 || Signed AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 21 |+----------------------------------+--------------------+ +Epoch 21 || Unsigned AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 21 |+----------------------------------+--------------------+ +Epoch 21 || Per-tensor AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 21 |+----------------------------------+--------------------+ +Epoch 21 || Per-channel AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 21 |+----------------------------------+--------------------+ +Epoch 21 | +Epoch 21 |Statistics of the bitwidth distribution: +Epoch 21 |+--------------+---------------------+--------------------+--------------------+ +Epoch 21 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 21 || | WQs | Placed AQs | Qs | +Epoch 21 |+==============+=====================+====================+====================+ +Epoch 21 || 8 | 100.00 % (74 / 74) | 100.00 % (99 / 99) | 100.00 % (173 / | +Epoch 21 || | | | 173) | +Epoch 21 |+--------------+---------------------+--------------------+--------------------+ +Epoch 21 | +Epoch 21 |Statistics of the sparsified model: +Epoch 21 |+-----------------------------------------+-------+ +Epoch 21 || Statistic's name | Value | +Epoch 21 |+=========================================+=======+ +Epoch 21 || Sparsity level of the whole model | 0.618 | +Epoch 21 |+-----------------------------------------+-------+ +Epoch 21 || Sparsity level of all sparsified layers | 0.797 | +Epoch 21 |+-----------------------------------------+-------+ +Epoch 21 | +Epoch 21 |Statistics by sparsified layers: +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 21 |+======================+================+================+=====================+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.606 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[0]/Be | | | | +Epoch 21 || rtAttention[attentio | | | | +Epoch 21 || n]/BertSelfAttention | | | | +Epoch 21 || [self]/NNCFLinear[qu | | | | +Epoch 21 || ery]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.614 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[0]/Be | | | | +Epoch 21 || rtAttention[attentio | | | | +Epoch 21 || n]/BertSelfAttention | | | | +Epoch 21 || [self]/NNCFLinear[ke | | | | +Epoch 21 || y]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.612 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[0]/Be | | | | +Epoch 21 || rtAttention[attentio | | | | +Epoch 21 || n]/BertSelfAttention | | | | +Epoch 21 || [self]/NNCFLinear[va | | | | +Epoch 21 || lue]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.632 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[0]/Be | | | | +Epoch 21 || rtAttention[attentio | | | | +Epoch 21 || n]/BertSelfOutput[ou | | | | +Epoch 21 || tput]/NNCFLinear[den | | | | +Epoch 21 || se]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [3072, 768] | 0.890 | 2.778 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[0]/Be | | | | +Epoch 21 || rtIntermediate[inter | | | | +Epoch 21 || mediate]/NNCFLinear[ | | | | +Epoch 21 || dense]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 3072] | 0.896 | 2.778 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[0]/Be | | | | +Epoch 21 || rtOutput[output]/NNC | | | | +Epoch 21 || FLinear[dense]/linea | | | | +Epoch 21 || r_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.602 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[1]/Be | | | | +Epoch 21 || rtAttention[attentio | | | | +Epoch 21 || n]/BertSelfAttention | | | | +Epoch 21 || [self]/NNCFLinear[qu | | | | +Epoch 21 || ery]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.605 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[1]/Be | | | | +Epoch 21 || rtAttention[attentio | | | | +Epoch 21 || n]/BertSelfAttention | | | | +Epoch 21 || [self]/NNCFLinear[ke | | | | +Epoch 21 || y]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.613 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[1]/Be | | | | +Epoch 21 || rtAttention[attentio | | | | +Epoch 21 || n]/BertSelfAttention | | | | +Epoch 21 || [self]/NNCFLinear[va | | | | +Epoch 21 || lue]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.634 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[1]/Be | | | | +Epoch 21 || rtAttention[attentio | | | | +Epoch 21 || n]/BertSelfOutput[ou | | | | +Epoch 21 || tput]/NNCFLinear[den | | | | +Epoch 21 || se]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [3072, 768] | 0.890 | 2.778 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[1]/Be | | | | +Epoch 21 || rtIntermediate[inter | | | | +Epoch 21 || mediate]/NNCFLinear[ | | | | +Epoch 21 || dense]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 3072] | 0.897 | 2.778 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[1]/Be | | | | +Epoch 21 || rtOutput[output]/NNC | | | | +Epoch 21 || FLinear[dense]/linea | | | | +Epoch 21 || r_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.619 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[2]/Be | | | | +Epoch 21 || rtAttention[attentio | | | | +Epoch 21 || n]/BertSelfAttention | | | | +Epoch 21 || [self]/NNCFLinear[qu | | | | +Epoch 21 || ery]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.620 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[2]/Be | | | | +Epoch 21 || rtAttention[attentio | | | | +Epoch 21 || n]/BertSelfAttention | | | | +Epoch 21 || [self]/NNCFLinear[ke | | | | +Epoch 21 || y]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.616 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[2]/Be | | | | +Epoch 21 || rtAttention[attentio | | | | +Epoch 21 || n]/BertSelfAttention | | | | +Epoch 21 || [self]/NNCFLinear[va | | | | +Epoch 21 || lue]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.627 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[2]/Be | | | | +Epoch 21 || rtAttention[attentio | | | | +Epoch 21 || n]/BertSelfOutput[ou | | | | +Epoch 21 || tput]/NNCFLinear[den | | | | +Epoch 21 || se]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[2]/Be | | | | +Epoch 21 || rtIntermediate[inter | | | | +Epoch 21 || mediate]/NNCFLinear[ | | | | +Epoch 21 || dense]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 3072] | 0.896 | 2.778 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[2]/Be | | | | +Epoch 21 || rtOutput[output]/NNC | | | | +Epoch 21 || FLinear[dense]/linea | | | | +Epoch 21 || r_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.599 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[3]/Be | | | | +Epoch 21 || rtAttention[attentio | | | | +Epoch 21 || n]/BertSelfAttention | | | | +Epoch 21 || [self]/NNCFLinear[qu | | | | +Epoch 21 || ery]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.602 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[3]/Be | | | | +Epoch 21 || rtAttention[attentio | | | | +Epoch 21 || n]/BertSelfAttention | | | | +Epoch 21 || [self]/NNCFLinear[ke | | | | +Epoch 21 || y]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.613 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[3]/Be | | | | +Epoch 21 || rtAttention[attentio | | | | +Epoch 21 || n]/BertSelfAttention | | | | +Epoch 21 || [self]/NNCFLinear[va | | | | +Epoch 21 || lue]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.622 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[3]/Be | | | | +Epoch 21 || rtAttention[attentio | | | | +Epoch 21 || n]/BertSelfOutput[ou | | | | +Epoch 21 || tput]/NNCFLinear[den | | | | +Epoch 21 || se]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[3]/Be | | | | +Epoch 21 || rtIntermediate[inter | | | | +Epoch 21 || mediate]/NNCFLinear[ | | | | +Epoch 21 || dense]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 3072] | 0.898 | 2.778 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[3]/Be | | | | +Epoch 21 || rtOutput[output]/NNC | | | | +Epoch 21 || FLinear[dense]/linea | | | | +Epoch 21 || r_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.596 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[4]/Be | | | | +Epoch 21 || rtAttention[attentio | | | | +Epoch 21 || n]/BertSelfAttention | | | | +Epoch 21 || [self]/NNCFLinear[qu | | | | +Epoch 21 || ery]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.597 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[4]/Be | | | | +Epoch 21 || rtAttention[attentio | | | | +Epoch 21 || n]/BertSelfAttention | | | | +Epoch 21 || [self]/NNCFLinear[ke | | | | +Epoch 21 || y]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.604 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[4]/Be | | | | +Epoch 21 || rtAttention[attentio | | | | +Epoch 21 || n]/BertSelfAttention | | | | +Epoch 21 || [self]/NNCFLinear[va | | | | +Epoch 21 || lue]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.617 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[4]/Be | | | | +Epoch 21 || rtAttention[attentio | | | | +Epoch 21 || n]/BertSelfOutput[ou | | | | +Epoch 21 || tput]/NNCFLinear[den | | | | +Epoch 21 || se]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[4]/Be | | | | +Epoch 21 || rtIntermediate[inter | | | | +Epoch 21 || mediate]/NNCFLinear[ | | | | +Epoch 21 || dense]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 3072] | 0.897 | 2.778 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[4]/Be | | | | +Epoch 21 || rtOutput[output]/NNC | | | | +Epoch 21 || FLinear[dense]/linea | | | | +Epoch 21 || r_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.595 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[5]/Be | | | | +Epoch 21 || rtAttention[attentio | | | | +Epoch 21 || n]/BertSelfAttention | | | | +Epoch 21 || [self]/NNCFLinear[qu | | | | +Epoch 21 || ery]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.597 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[5]/Be | | | | +Epoch 21 || rtAttention[attentio | | | | +Epoch 21 || n]/BertSelfAttention | | | | +Epoch 21 || [self]/NNCFLinear[ke | | | | +Epoch 21 || y]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.609 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[5]/Be | | | | +Epoch 21 || rtAttention[attentio | | | | +Epoch 21 || n]/BertSelfAttention | | | | +Epoch 21 || [self]/NNCFLinear[va | | | | +Epoch 21 || lue]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.616 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[5]/Be | | | | +Epoch 21 || rtAttention[attentio | | | | +Epoch 21 || n]/BertSelfOutput[ou | | | | +Epoch 21 || tput]/NNCFLinear[den | | | | +Epoch 21 || se]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[5]/Be | | | | +Epoch 21 || rtIntermediate[inter | | | | +Epoch 21 || mediate]/NNCFLinear[ | | | | +Epoch 21 || dense]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 3072] | 0.897 | 2.778 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[5]/Be | | | | +Epoch 21 || rtOutput[output]/NNC | | | | +Epoch 21 || FLinear[dense]/linea | | | | +Epoch 21 || r_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.593 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[6]/Be | | | | +Epoch 21 || rtAttention[attentio | | | | +Epoch 21 || n]/BertSelfAttention | | | | +Epoch 21 || [self]/NNCFLinear[qu | | | | +Epoch 21 || ery]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[6]/Be | | | | +Epoch 21 || rtAttention[attentio | | | | +Epoch 21 || n]/BertSelfAttention | | | | +Epoch 21 || [self]/NNCFLinear[ke | | | | +Epoch 21 || y]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.609 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[6]/Be | | | | +Epoch 21 || rtAttention[attentio | | | | +Epoch 21 || n]/BertSelfAttention | | | | +Epoch 21 || [self]/NNCFLinear[va | | | | +Epoch 21 || lue]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.616 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[6]/Be | | | | +Epoch 21 || rtAttention[attentio | | | | +Epoch 21 || n]/BertSelfOutput[ou | | | | +Epoch 21 || tput]/NNCFLinear[den | | | | +Epoch 21 || se]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [3072, 768] | 0.890 | 2.778 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[6]/Be | | | | +Epoch 21 || rtIntermediate[inter | | | | +Epoch 21 || mediate]/NNCFLinear[ | | | | +Epoch 21 || dense]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 3072] | 0.895 | 2.778 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[6]/Be | | | | +Epoch 21 || rtOutput[output]/NNC | | | | +Epoch 21 || FLinear[dense]/linea | | | | +Epoch 21 || r_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.593 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[7]/Be | | | | +Epoch 21 || rtAttention[attentio | | | | +Epoch 21 || n]/BertSelfAttention | | | | +Epoch 21 || [self]/NNCFLinear[qu | | | | +Epoch 21 || ery]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[7]/Be | | | | +Epoch 21 || rtAttention[attentio | | | | +Epoch 21 || n]/BertSelfAttention | | | | +Epoch 21 || [self]/NNCFLinear[ke | | | | +Epoch 21 || y]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.604 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[7]/Be | | | | +Epoch 21 || rtAttention[attentio | | | | +Epoch 21 || n]/BertSelfAttention | | | | +Epoch 21 || [self]/NNCFLinear[va | | | | +Epoch 21 || lue]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.610 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[7]/Be | | | | +Epoch 21 || rtAttention[attentio | | | | +Epoch 21 || n]/BertSelfOutput[ou | | | | +Epoch 21 || tput]/NNCFLinear[den | | | | +Epoch 21 || se]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [3072, 768] | 0.890 | 2.778 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[7]/Be | | | | +Epoch 21 || rtIntermediate[inter | | | | +Epoch 21 || mediate]/NNCFLinear[ | | | | +Epoch 21 || dense]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 3072] | 0.894 | 2.778 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[7]/Be | | | | +Epoch 21 || rtOutput[output]/NNC | | | | +Epoch 21 || FLinear[dense]/linea | | | | +Epoch 21 || r_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.593 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[8]/Be | | | | +Epoch 21 || rtAttention[attentio | | | | +Epoch 21 || n]/BertSelfAttention | | | | +Epoch 21 || [self]/NNCFLinear[qu | | | | +Epoch 21 || ery]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.593 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[8]/Be | | | | +Epoch 21 || rtAttention[attentio | | | | +Epoch 21 || n]/BertSelfAttention | | | | +Epoch 21 || [self]/NNCFLinear[ke | | | | +Epoch 21 || y]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.597 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[8]/Be | | | | +Epoch 21 || rtAttention[attentio | | | | +Epoch 21 || n]/BertSelfAttention | | | | +Epoch 21 || [self]/NNCFLinear[va | | | | +Epoch 21 || lue]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.603 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[8]/Be | | | | +Epoch 21 || rtAttention[attentio | | | | +Epoch 21 || n]/BertSelfOutput[ou | | | | +Epoch 21 || tput]/NNCFLinear[den | | | | +Epoch 21 || se]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [3072, 768] | 0.890 | 2.778 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[8]/Be | | | | +Epoch 21 || rtIntermediate[inter | | | | +Epoch 21 || mediate]/NNCFLinear[ | | | | +Epoch 21 || dense]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 3072] | 0.894 | 2.778 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[8]/Be | | | | +Epoch 21 || rtOutput[output]/NNC | | | | +Epoch 21 || FLinear[dense]/linea | | | | +Epoch 21 || r_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.588 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[9]/Be | | | | +Epoch 21 || rtAttention[attentio | | | | +Epoch 21 || n]/BertSelfAttention | | | | +Epoch 21 || [self]/NNCFLinear[qu | | | | +Epoch 21 || ery]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.591 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[9]/Be | | | | +Epoch 21 || rtAttention[attentio | | | | +Epoch 21 || n]/BertSelfAttention | | | | +Epoch 21 || [self]/NNCFLinear[ke | | | | +Epoch 21 || y]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.595 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[9]/Be | | | | +Epoch 21 || rtAttention[attentio | | | | +Epoch 21 || n]/BertSelfAttention | | | | +Epoch 21 || [self]/NNCFLinear[va | | | | +Epoch 21 || lue]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.598 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[9]/Be | | | | +Epoch 21 || rtAttention[attentio | | | | +Epoch 21 || n]/BertSelfOutput[ou | | | | +Epoch 21 || tput]/NNCFLinear[den | | | | +Epoch 21 || se]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [3072, 768] | 0.893 | 2.778 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[9]/Be | | | | +Epoch 21 || rtIntermediate[inter | | | | +Epoch 21 || mediate]/NNCFLinear[ | | | | +Epoch 21 || dense]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 3072] | 0.898 | 2.778 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[9]/Be | | | | +Epoch 21 || rtOutput[output]/NNC | | | | +Epoch 21 || FLinear[dense]/linea | | | | +Epoch 21 || r_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.590 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[10]/B | | | | +Epoch 21 || ertAttention[attenti | | | | +Epoch 21 || on]/BertSelfAttentio | | | | +Epoch 21 || n[self]/NNCFLinear[q | | | | +Epoch 21 || uery]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.590 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[10]/B | | | | +Epoch 21 || ertAttention[attenti | | | | +Epoch 21 || on]/BertSelfAttentio | | | | +Epoch 21 || n[self]/NNCFLinear[k | | | | +Epoch 21 || ey]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.605 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[10]/B | | | | +Epoch 21 || ertAttention[attenti | | | | +Epoch 21 || on]/BertSelfAttentio | | | | +Epoch 21 || n[self]/NNCFLinear[v | | | | +Epoch 21 || alue]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.601 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[10]/B | | | | +Epoch 21 || ertAttention[attenti | | | | +Epoch 21 || on]/BertSelfOutput[o | | | | +Epoch 21 || utput]/NNCFLinear[de | | | | +Epoch 21 || nse]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[10]/B | | | | +Epoch 21 || ertIntermediate[inte | | | | +Epoch 21 || rmediate]/NNCFLinear | | | | +Epoch 21 || [dense]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 3072] | 0.895 | 2.778 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[10]/B | | | | +Epoch 21 || ertOutput[output]/NN | | | | +Epoch 21 || CFLinear[dense]/line | | | | +Epoch 21 || ar_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.593 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[11]/B | | | | +Epoch 21 || ertAttention[attenti | | | | +Epoch 21 || on]/BertSelfAttentio | | | | +Epoch 21 || n[self]/NNCFLinear[q | | | | +Epoch 21 || uery]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.589 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[11]/B | | | | +Epoch 21 || ertAttention[attenti | | | | +Epoch 21 || on]/BertSelfAttentio | | | | +Epoch 21 || n[self]/NNCFLinear[k | | | | +Epoch 21 || ey]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[11]/B | | | | +Epoch 21 || ertAttention[attenti | | | | +Epoch 21 || on]/BertSelfAttentio | | | | +Epoch 21 || n[self]/NNCFLinear[v | | | | +Epoch 21 || alue]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 768] | 0.591 | 0.694 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[11]/B | | | | +Epoch 21 || ertAttention[attenti | | | | +Epoch 21 || on]/BertSelfOutput[o | | | | +Epoch 21 || utput]/NNCFLinear[de | | | | +Epoch 21 || nse]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [3072, 768] | 0.889 | 2.778 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[11]/B | | | | +Epoch 21 || ertIntermediate[inte | | | | +Epoch 21 || rmediate]/NNCFLinear | | | | +Epoch 21 || [dense]/linear_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 || BertForSequenceClass | [768, 3072] | 0.891 | 2.778 | +Epoch 21 || ification/BertModel[ | | | | +Epoch 21 || bert]/BertEncoder[en | | | | +Epoch 21 || coder]/ModuleList[la | | | | +Epoch 21 || yer]/BertLayer[11]/B | | | | +Epoch 21 || ertOutput[output]/NN | | | | +Epoch 21 || CFLinear[dense]/line | | | | +Epoch 21 || ar_0 | | | | +Epoch 21 |+----------------------+----------------+----------------+---------------------+ +Epoch 21 | +Epoch 21 |Statistics of the magnitude sparsity algorithm: +Epoch 21 |+----------------------------------------------------------------------+-------+ +Epoch 21 || Statistic's name | Value | +Epoch 21 |+======================================================================+=======+ +Epoch 21 || A target level of the sparsity for the algorithm for the current | 0.797 | +Epoch 21 || epoch | | +Epoch 21 |+----------------------------------------------------------------------+-------+ +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || Layer's name | Sparsity threshold | +Epoch 21 |+=========================================================+====================+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 21 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 21 || linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 21 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 21 || near_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 21 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 21 || linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 21 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 21 || inear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermed | | +Epoch 21 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[o | | +Epoch 21 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 21 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 21 || linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 21 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 21 || near_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 21 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 21 || linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 21 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 21 || inear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermed | | +Epoch 21 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[o | | +Epoch 21 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 21 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 21 || linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 21 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 21 || near_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 21 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 21 || linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 21 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 21 || inear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermed | | +Epoch 21 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[o | | +Epoch 21 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 21 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 21 || linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 21 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 21 || near_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 21 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 21 || linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 21 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 21 || inear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermed | | +Epoch 21 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[o | | +Epoch 21 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 21 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 21 || linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 21 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 21 || near_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 21 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 21 || linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 21 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 21 || inear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermed | | +Epoch 21 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[o | | +Epoch 21 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 21 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 21 || linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 21 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 21 || near_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 21 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 21 || linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 21 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 21 || inear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermed | | +Epoch 21 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[o | | +Epoch 21 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 21 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 21 || linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 21 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 21 || near_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 21 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 21 || linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 21 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 21 || inear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermed | | +Epoch 21 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[o | | +Epoch 21 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 21 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 21 || linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 21 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 21 || near_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 21 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 21 || linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 21 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 21 || inear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermed | | +Epoch 21 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[o | | +Epoch 21 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 21 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 21 || linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 21 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 21 || near_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 21 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 21 || linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 21 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 21 || inear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermed | | +Epoch 21 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[o | | +Epoch 21 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 21 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 21 || linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 21 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 21 || near_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 21 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 21 || linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 21 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 21 || inear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermed | | +Epoch 21 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[o | | +Epoch 21 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 21 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 21 || /linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 21 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 21 || inear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 21 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 21 || /linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 21 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 21 || linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertInterme | | +Epoch 21 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[ | | +Epoch 21 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 21 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 21 || /linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 21 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 21 || inear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 21 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 21 || /linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 21 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 21 || linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertInterme | | +Epoch 21 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +Epoch 21 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 21 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[ | | +Epoch 21 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 21 |+---------------------------------------------------------+--------------------+ +INFO:nncf:Statistics of the quantization algorithm: +Epoch 22 |+--------------------------------+-------+ +Epoch 22 || Statistic's name | Value | +Epoch 22 |+================================+=======+ +Epoch 22 || Ratio of enabled quantizations | 100 | +Epoch 22 |+--------------------------------+-------+ +Epoch 22 | +Epoch 22 |Statistics of the quantization share: +Epoch 22 |+----------------------------------+--------------------+ +Epoch 22 || Statistic's name | Value | +Epoch 22 |+==================================+====================+ +Epoch 22 || Symmetric WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 22 |+----------------------------------+--------------------+ +Epoch 22 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 22 |+----------------------------------+--------------------+ +Epoch 22 || Signed WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 22 |+----------------------------------+--------------------+ +Epoch 22 || Unsigned WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 22 |+----------------------------------+--------------------+ +Epoch 22 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 22 |+----------------------------------+--------------------+ +Epoch 22 || Per-channel WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 22 |+----------------------------------+--------------------+ +Epoch 22 || Placed WQs / Potential WQs | 72.55 % (74 / 102) | +Epoch 22 |+----------------------------------+--------------------+ +Epoch 22 || Symmetric AQs / All placed AQs | 24.24 % (24 / 99) | +Epoch 22 |+----------------------------------+--------------------+ +Epoch 22 || Asymmetric AQs / All placed AQs | 75.76 % (75 / 99) | +Epoch 22 |+----------------------------------+--------------------+ +Epoch 22 || Signed AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 22 |+----------------------------------+--------------------+ +Epoch 22 || Unsigned AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 22 |+----------------------------------+--------------------+ +Epoch 22 || Per-tensor AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 22 |+----------------------------------+--------------------+ +Epoch 22 || Per-channel AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 22 |+----------------------------------+--------------------+ +Epoch 22 | +Epoch 22 |Statistics of the bitwidth distribution: +Epoch 22 |+--------------+---------------------+--------------------+--------------------+ +Epoch 22 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 22 || | WQs | Placed AQs | Qs | +Epoch 22 |+==============+=====================+====================+====================+ +Epoch 22 || 8 | 100.00 % (74 / 74) | 100.00 % (99 / 99) | 100.00 % (173 / | +Epoch 22 || | | | 173) | +Epoch 22 |+--------------+---------------------+--------------------+--------------------+ +Epoch 22 | +Epoch 22 |Statistics of the sparsified model: +Epoch 22 |+-----------------------------------------+-------+ +Epoch 22 || Statistic's name | Value | +Epoch 22 |+=========================================+=======+ +Epoch 22 || Sparsity level of the whole model | 0.620 | +Epoch 22 |+-----------------------------------------+-------+ +Epoch 22 || Sparsity level of all sparsified layers | 0.799 | +Epoch 22 |+-----------------------------------------+-------+ +Epoch 22 | +Epoch 22 |Statistics by sparsified layers: +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 22 |+======================+================+================+=====================+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.607 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[0]/Be | | | | +Epoch 22 || rtAttention[attentio | | | | +Epoch 22 || n]/BertSelfAttention | | | | +Epoch 22 || [self]/NNCFLinear[qu | | | | +Epoch 22 || ery]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.615 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[0]/Be | | | | +Epoch 22 || rtAttention[attentio | | | | +Epoch 22 || n]/BertSelfAttention | | | | +Epoch 22 || [self]/NNCFLinear[ke | | | | +Epoch 22 || y]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.612 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[0]/Be | | | | +Epoch 22 || rtAttention[attentio | | | | +Epoch 22 || n]/BertSelfAttention | | | | +Epoch 22 || [self]/NNCFLinear[va | | | | +Epoch 22 || lue]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.633 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[0]/Be | | | | +Epoch 22 || rtAttention[attentio | | | | +Epoch 22 || n]/BertSelfOutput[ou | | | | +Epoch 22 || tput]/NNCFLinear[den | | | | +Epoch 22 || se]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [3072, 768] | 0.892 | 2.778 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[0]/Be | | | | +Epoch 22 || rtIntermediate[inter | | | | +Epoch 22 || mediate]/NNCFLinear[ | | | | +Epoch 22 || dense]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 3072] | 0.898 | 2.778 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[0]/Be | | | | +Epoch 22 || rtOutput[output]/NNC | | | | +Epoch 22 || FLinear[dense]/linea | | | | +Epoch 22 || r_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.604 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[1]/Be | | | | +Epoch 22 || rtAttention[attentio | | | | +Epoch 22 || n]/BertSelfAttention | | | | +Epoch 22 || [self]/NNCFLinear[qu | | | | +Epoch 22 || ery]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.607 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[1]/Be | | | | +Epoch 22 || rtAttention[attentio | | | | +Epoch 22 || n]/BertSelfAttention | | | | +Epoch 22 || [self]/NNCFLinear[ke | | | | +Epoch 22 || y]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.613 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[1]/Be | | | | +Epoch 22 || rtAttention[attentio | | | | +Epoch 22 || n]/BertSelfAttention | | | | +Epoch 22 || [self]/NNCFLinear[va | | | | +Epoch 22 || lue]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.634 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[1]/Be | | | | +Epoch 22 || rtAttention[attentio | | | | +Epoch 22 || n]/BertSelfOutput[ou | | | | +Epoch 22 || tput]/NNCFLinear[den | | | | +Epoch 22 || se]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [3072, 768] | 0.892 | 2.778 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[1]/Be | | | | +Epoch 22 || rtIntermediate[inter | | | | +Epoch 22 || mediate]/NNCFLinear[ | | | | +Epoch 22 || dense]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 3072] | 0.899 | 2.778 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[1]/Be | | | | +Epoch 22 || rtOutput[output]/NNC | | | | +Epoch 22 || FLinear[dense]/linea | | | | +Epoch 22 || r_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.620 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[2]/Be | | | | +Epoch 22 || rtAttention[attentio | | | | +Epoch 22 || n]/BertSelfAttention | | | | +Epoch 22 || [self]/NNCFLinear[qu | | | | +Epoch 22 || ery]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.621 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[2]/Be | | | | +Epoch 22 || rtAttention[attentio | | | | +Epoch 22 || n]/BertSelfAttention | | | | +Epoch 22 || [self]/NNCFLinear[ke | | | | +Epoch 22 || y]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.616 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[2]/Be | | | | +Epoch 22 || rtAttention[attentio | | | | +Epoch 22 || n]/BertSelfAttention | | | | +Epoch 22 || [self]/NNCFLinear[va | | | | +Epoch 22 || lue]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.628 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[2]/Be | | | | +Epoch 22 || rtAttention[attentio | | | | +Epoch 22 || n]/BertSelfOutput[ou | | | | +Epoch 22 || tput]/NNCFLinear[den | | | | +Epoch 22 || se]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [3072, 768] | 0.893 | 2.778 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[2]/Be | | | | +Epoch 22 || rtIntermediate[inter | | | | +Epoch 22 || mediate]/NNCFLinear[ | | | | +Epoch 22 || dense]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 3072] | 0.898 | 2.778 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[2]/Be | | | | +Epoch 22 || rtOutput[output]/NNC | | | | +Epoch 22 || FLinear[dense]/linea | | | | +Epoch 22 || r_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.601 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[3]/Be | | | | +Epoch 22 || rtAttention[attentio | | | | +Epoch 22 || n]/BertSelfAttention | | | | +Epoch 22 || [self]/NNCFLinear[qu | | | | +Epoch 22 || ery]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.603 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[3]/Be | | | | +Epoch 22 || rtAttention[attentio | | | | +Epoch 22 || n]/BertSelfAttention | | | | +Epoch 22 || [self]/NNCFLinear[ke | | | | +Epoch 22 || y]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.614 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[3]/Be | | | | +Epoch 22 || rtAttention[attentio | | | | +Epoch 22 || n]/BertSelfAttention | | | | +Epoch 22 || [self]/NNCFLinear[va | | | | +Epoch 22 || lue]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.623 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[3]/Be | | | | +Epoch 22 || rtAttention[attentio | | | | +Epoch 22 || n]/BertSelfOutput[ou | | | | +Epoch 22 || tput]/NNCFLinear[den | | | | +Epoch 22 || se]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [3072, 768] | 0.893 | 2.778 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[3]/Be | | | | +Epoch 22 || rtIntermediate[inter | | | | +Epoch 22 || mediate]/NNCFLinear[ | | | | +Epoch 22 || dense]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 3072] | 0.901 | 2.778 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[3]/Be | | | | +Epoch 22 || rtOutput[output]/NNC | | | | +Epoch 22 || FLinear[dense]/linea | | | | +Epoch 22 || r_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.598 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[4]/Be | | | | +Epoch 22 || rtAttention[attentio | | | | +Epoch 22 || n]/BertSelfAttention | | | | +Epoch 22 || [self]/NNCFLinear[qu | | | | +Epoch 22 || ery]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.598 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[4]/Be | | | | +Epoch 22 || rtAttention[attentio | | | | +Epoch 22 || n]/BertSelfAttention | | | | +Epoch 22 || [self]/NNCFLinear[ke | | | | +Epoch 22 || y]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.605 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[4]/Be | | | | +Epoch 22 || rtAttention[attentio | | | | +Epoch 22 || n]/BertSelfAttention | | | | +Epoch 22 || [self]/NNCFLinear[va | | | | +Epoch 22 || lue]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.618 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[4]/Be | | | | +Epoch 22 || rtAttention[attentio | | | | +Epoch 22 || n]/BertSelfOutput[ou | | | | +Epoch 22 || tput]/NNCFLinear[den | | | | +Epoch 22 || se]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [3072, 768] | 0.893 | 2.778 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[4]/Be | | | | +Epoch 22 || rtIntermediate[inter | | | | +Epoch 22 || mediate]/NNCFLinear[ | | | | +Epoch 22 || dense]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 3072] | 0.900 | 2.778 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[4]/Be | | | | +Epoch 22 || rtOutput[output]/NNC | | | | +Epoch 22 || FLinear[dense]/linea | | | | +Epoch 22 || r_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.596 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[5]/Be | | | | +Epoch 22 || rtAttention[attentio | | | | +Epoch 22 || n]/BertSelfAttention | | | | +Epoch 22 || [self]/NNCFLinear[qu | | | | +Epoch 22 || ery]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.598 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[5]/Be | | | | +Epoch 22 || rtAttention[attentio | | | | +Epoch 22 || n]/BertSelfAttention | | | | +Epoch 22 || [self]/NNCFLinear[ke | | | | +Epoch 22 || y]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.610 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[5]/Be | | | | +Epoch 22 || rtAttention[attentio | | | | +Epoch 22 || n]/BertSelfAttention | | | | +Epoch 22 || [self]/NNCFLinear[va | | | | +Epoch 22 || lue]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.617 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[5]/Be | | | | +Epoch 22 || rtAttention[attentio | | | | +Epoch 22 || n]/BertSelfOutput[ou | | | | +Epoch 22 || tput]/NNCFLinear[den | | | | +Epoch 22 || se]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [3072, 768] | 0.893 | 2.778 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[5]/Be | | | | +Epoch 22 || rtIntermediate[inter | | | | +Epoch 22 || mediate]/NNCFLinear[ | | | | +Epoch 22 || dense]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 3072] | 0.899 | 2.778 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[5]/Be | | | | +Epoch 22 || rtOutput[output]/NNC | | | | +Epoch 22 || FLinear[dense]/linea | | | | +Epoch 22 || r_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.595 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[6]/Be | | | | +Epoch 22 || rtAttention[attentio | | | | +Epoch 22 || n]/BertSelfAttention | | | | +Epoch 22 || [self]/NNCFLinear[qu | | | | +Epoch 22 || ery]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.596 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[6]/Be | | | | +Epoch 22 || rtAttention[attentio | | | | +Epoch 22 || n]/BertSelfAttention | | | | +Epoch 22 || [self]/NNCFLinear[ke | | | | +Epoch 22 || y]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.610 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[6]/Be | | | | +Epoch 22 || rtAttention[attentio | | | | +Epoch 22 || n]/BertSelfAttention | | | | +Epoch 22 || [self]/NNCFLinear[va | | | | +Epoch 22 || lue]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.617 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[6]/Be | | | | +Epoch 22 || rtAttention[attentio | | | | +Epoch 22 || n]/BertSelfOutput[ou | | | | +Epoch 22 || tput]/NNCFLinear[den | | | | +Epoch 22 || se]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [3072, 768] | 0.893 | 2.778 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[6]/Be | | | | +Epoch 22 || rtIntermediate[inter | | | | +Epoch 22 || mediate]/NNCFLinear[ | | | | +Epoch 22 || dense]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 3072] | 0.898 | 2.778 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[6]/Be | | | | +Epoch 22 || rtOutput[output]/NNC | | | | +Epoch 22 || FLinear[dense]/linea | | | | +Epoch 22 || r_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[7]/Be | | | | +Epoch 22 || rtAttention[attentio | | | | +Epoch 22 || n]/BertSelfAttention | | | | +Epoch 22 || [self]/NNCFLinear[qu | | | | +Epoch 22 || ery]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.595 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[7]/Be | | | | +Epoch 22 || rtAttention[attentio | | | | +Epoch 22 || n]/BertSelfAttention | | | | +Epoch 22 || [self]/NNCFLinear[ke | | | | +Epoch 22 || y]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.605 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[7]/Be | | | | +Epoch 22 || rtAttention[attentio | | | | +Epoch 22 || n]/BertSelfAttention | | | | +Epoch 22 || [self]/NNCFLinear[va | | | | +Epoch 22 || lue]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.611 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[7]/Be | | | | +Epoch 22 || rtAttention[attentio | | | | +Epoch 22 || n]/BertSelfOutput[ou | | | | +Epoch 22 || tput]/NNCFLinear[den | | | | +Epoch 22 || se]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [3072, 768] | 0.892 | 2.778 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[7]/Be | | | | +Epoch 22 || rtIntermediate[inter | | | | +Epoch 22 || mediate]/NNCFLinear[ | | | | +Epoch 22 || dense]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 3072] | 0.896 | 2.778 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[7]/Be | | | | +Epoch 22 || rtOutput[output]/NNC | | | | +Epoch 22 || FLinear[dense]/linea | | | | +Epoch 22 || r_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[8]/Be | | | | +Epoch 22 || rtAttention[attentio | | | | +Epoch 22 || n]/BertSelfAttention | | | | +Epoch 22 || [self]/NNCFLinear[qu | | | | +Epoch 22 || ery]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[8]/Be | | | | +Epoch 22 || rtAttention[attentio | | | | +Epoch 22 || n]/BertSelfAttention | | | | +Epoch 22 || [self]/NNCFLinear[ke | | | | +Epoch 22 || y]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.598 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[8]/Be | | | | +Epoch 22 || rtAttention[attentio | | | | +Epoch 22 || n]/BertSelfAttention | | | | +Epoch 22 || [self]/NNCFLinear[va | | | | +Epoch 22 || lue]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.604 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[8]/Be | | | | +Epoch 22 || rtAttention[attentio | | | | +Epoch 22 || n]/BertSelfOutput[ou | | | | +Epoch 22 || tput]/NNCFLinear[den | | | | +Epoch 22 || se]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [3072, 768] | 0.892 | 2.778 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[8]/Be | | | | +Epoch 22 || rtIntermediate[inter | | | | +Epoch 22 || mediate]/NNCFLinear[ | | | | +Epoch 22 || dense]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 3072] | 0.896 | 2.778 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[8]/Be | | | | +Epoch 22 || rtOutput[output]/NNC | | | | +Epoch 22 || FLinear[dense]/linea | | | | +Epoch 22 || r_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.590 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[9]/Be | | | | +Epoch 22 || rtAttention[attentio | | | | +Epoch 22 || n]/BertSelfAttention | | | | +Epoch 22 || [self]/NNCFLinear[qu | | | | +Epoch 22 || ery]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.593 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[9]/Be | | | | +Epoch 22 || rtAttention[attentio | | | | +Epoch 22 || n]/BertSelfAttention | | | | +Epoch 22 || [self]/NNCFLinear[ke | | | | +Epoch 22 || y]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.596 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[9]/Be | | | | +Epoch 22 || rtAttention[attentio | | | | +Epoch 22 || n]/BertSelfAttention | | | | +Epoch 22 || [self]/NNCFLinear[va | | | | +Epoch 22 || lue]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.599 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[9]/Be | | | | +Epoch 22 || rtAttention[attentio | | | | +Epoch 22 || n]/BertSelfOutput[ou | | | | +Epoch 22 || tput]/NNCFLinear[den | | | | +Epoch 22 || se]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [3072, 768] | 0.896 | 2.778 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[9]/Be | | | | +Epoch 22 || rtIntermediate[inter | | | | +Epoch 22 || mediate]/NNCFLinear[ | | | | +Epoch 22 || dense]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 3072] | 0.900 | 2.778 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[9]/Be | | | | +Epoch 22 || rtOutput[output]/NNC | | | | +Epoch 22 || FLinear[dense]/linea | | | | +Epoch 22 || r_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.591 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[10]/B | | | | +Epoch 22 || ertAttention[attenti | | | | +Epoch 22 || on]/BertSelfAttentio | | | | +Epoch 22 || n[self]/NNCFLinear[q | | | | +Epoch 22 || uery]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.592 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[10]/B | | | | +Epoch 22 || ertAttention[attenti | | | | +Epoch 22 || on]/BertSelfAttentio | | | | +Epoch 22 || n[self]/NNCFLinear[k | | | | +Epoch 22 || ey]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.606 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[10]/B | | | | +Epoch 22 || ertAttention[attenti | | | | +Epoch 22 || on]/BertSelfAttentio | | | | +Epoch 22 || n[self]/NNCFLinear[v | | | | +Epoch 22 || alue]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.602 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[10]/B | | | | +Epoch 22 || ertAttention[attenti | | | | +Epoch 22 || on]/BertSelfOutput[o | | | | +Epoch 22 || utput]/NNCFLinear[de | | | | +Epoch 22 || nse]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [3072, 768] | 0.893 | 2.778 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[10]/B | | | | +Epoch 22 || ertIntermediate[inte | | | | +Epoch 22 || rmediate]/NNCFLinear | | | | +Epoch 22 || [dense]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 3072] | 0.897 | 2.778 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[10]/B | | | | +Epoch 22 || ertOutput[output]/NN | | | | +Epoch 22 || CFLinear[dense]/line | | | | +Epoch 22 || ar_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.595 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[11]/B | | | | +Epoch 22 || ertAttention[attenti | | | | +Epoch 22 || on]/BertSelfAttentio | | | | +Epoch 22 || n[self]/NNCFLinear[q | | | | +Epoch 22 || uery]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.591 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[11]/B | | | | +Epoch 22 || ertAttention[attenti | | | | +Epoch 22 || on]/BertSelfAttentio | | | | +Epoch 22 || n[self]/NNCFLinear[k | | | | +Epoch 22 || ey]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.595 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[11]/B | | | | +Epoch 22 || ertAttention[attenti | | | | +Epoch 22 || on]/BertSelfAttentio | | | | +Epoch 22 || n[self]/NNCFLinear[v | | | | +Epoch 22 || alue]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 768] | 0.593 | 0.694 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[11]/B | | | | +Epoch 22 || ertAttention[attenti | | | | +Epoch 22 || on]/BertSelfOutput[o | | | | +Epoch 22 || utput]/NNCFLinear[de | | | | +Epoch 22 || nse]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [3072, 768] | 0.892 | 2.778 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[11]/B | | | | +Epoch 22 || ertIntermediate[inte | | | | +Epoch 22 || rmediate]/NNCFLinear | | | | +Epoch 22 || [dense]/linear_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 || BertForSequenceClass | [768, 3072] | 0.894 | 2.778 | +Epoch 22 || ification/BertModel[ | | | | +Epoch 22 || bert]/BertEncoder[en | | | | +Epoch 22 || coder]/ModuleList[la | | | | +Epoch 22 || yer]/BertLayer[11]/B | | | | +Epoch 22 || ertOutput[output]/NN | | | | +Epoch 22 || CFLinear[dense]/line | | | | +Epoch 22 || ar_0 | | | | +Epoch 22 |+----------------------+----------------+----------------+---------------------+ +Epoch 22 | +Epoch 22 |Statistics of the magnitude sparsity algorithm: +Epoch 22 |+----------------------------------------------------------------------+-------+ +Epoch 22 || Statistic's name | Value | +Epoch 22 |+======================================================================+=======+ +Epoch 22 || A target level of the sparsity for the algorithm for the current | 0.799 | +Epoch 22 || epoch | | +Epoch 22 |+----------------------------------------------------------------------+-------+ +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || Layer's name | Sparsity threshold | +Epoch 22 |+=========================================================+====================+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 22 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 22 || linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 22 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 22 || near_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 22 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 22 || linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 22 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 22 || inear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermed | | +Epoch 22 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[o | | +Epoch 22 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 22 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 22 || linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 22 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 22 || near_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 22 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 22 || linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 22 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 22 || inear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermed | | +Epoch 22 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[o | | +Epoch 22 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 22 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 22 || linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 22 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 22 || near_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 22 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 22 || linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 22 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 22 || inear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermed | | +Epoch 22 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[o | | +Epoch 22 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 22 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 22 || linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 22 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 22 || near_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 22 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 22 || linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 22 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 22 || inear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermed | | +Epoch 22 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[o | | +Epoch 22 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 22 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 22 || linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 22 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 22 || near_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 22 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 22 || linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 22 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 22 || inear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermed | | +Epoch 22 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[o | | +Epoch 22 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 22 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 22 || linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 22 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 22 || near_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 22 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 22 || linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 22 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 22 || inear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermed | | +Epoch 22 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[o | | +Epoch 22 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 22 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 22 || linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 22 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 22 || near_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 22 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 22 || linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 22 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 22 || inear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermed | | +Epoch 22 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[o | | +Epoch 22 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 22 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 22 || linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 22 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 22 || near_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 22 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 22 || linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 22 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 22 || inear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermed | | +Epoch 22 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[o | | +Epoch 22 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 22 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 22 || linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 22 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 22 || near_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 22 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 22 || linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 22 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 22 || inear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermed | | +Epoch 22 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[o | | +Epoch 22 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 22 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 22 || linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 22 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 22 || near_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 22 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 22 || linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 22 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 22 || inear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermed | | +Epoch 22 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[o | | +Epoch 22 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 22 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 22 || /linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 22 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 22 || inear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 22 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 22 || /linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 22 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 22 || linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertInterme | | +Epoch 22 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[ | | +Epoch 22 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 22 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 22 || /linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 22 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 22 || inear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 22 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 22 || /linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 22 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 22 || linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertInterme | | +Epoch 22 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +Epoch 22 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 22 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[ | | +Epoch 22 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 22 |+---------------------------------------------------------+--------------------+ +INFO:nncf:Statistics of the quantization algorithm: +Epoch 23 |+--------------------------------+-------+ +Epoch 23 || Statistic's name | Value | +Epoch 23 |+================================+=======+ +Epoch 23 || Ratio of enabled quantizations | 100 | +Epoch 23 |+--------------------------------+-------+ +Epoch 23 | +Epoch 23 |Statistics of the quantization share: +Epoch 23 |+----------------------------------+--------------------+ +Epoch 23 || Statistic's name | Value | +Epoch 23 |+==================================+====================+ +Epoch 23 || Symmetric WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 23 |+----------------------------------+--------------------+ +Epoch 23 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 23 |+----------------------------------+--------------------+ +Epoch 23 || Signed WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 23 |+----------------------------------+--------------------+ +Epoch 23 || Unsigned WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 23 |+----------------------------------+--------------------+ +Epoch 23 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 23 |+----------------------------------+--------------------+ +Epoch 23 || Per-channel WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 23 |+----------------------------------+--------------------+ +Epoch 23 || Placed WQs / Potential WQs | 72.55 % (74 / 102) | +Epoch 23 |+----------------------------------+--------------------+ +Epoch 23 || Symmetric AQs / All placed AQs | 24.24 % (24 / 99) | +Epoch 23 |+----------------------------------+--------------------+ +Epoch 23 || Asymmetric AQs / All placed AQs | 75.76 % (75 / 99) | +Epoch 23 |+----------------------------------+--------------------+ +Epoch 23 || Signed AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 23 |+----------------------------------+--------------------+ +Epoch 23 || Unsigned AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 23 |+----------------------------------+--------------------+ +Epoch 23 || Per-tensor AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 23 |+----------------------------------+--------------------+ +Epoch 23 || Per-channel AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 23 |+----------------------------------+--------------------+ +Epoch 23 | +Epoch 23 |Statistics of the bitwidth distribution: +Epoch 23 |+--------------+---------------------+--------------------+--------------------+ +Epoch 23 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 23 || | WQs | Placed AQs | Qs | +Epoch 23 |+==============+=====================+====================+====================+ +Epoch 23 || 8 | 100.00 % (74 / 74) | 100.00 % (99 / 99) | 100.00 % (173 / | +Epoch 23 || | | | 173) | +Epoch 23 |+--------------+---------------------+--------------------+--------------------+ +Epoch 23 | +Epoch 23 |Statistics of the sparsified model: +Epoch 23 |+-----------------------------------------+-------+ +Epoch 23 || Statistic's name | Value | +Epoch 23 |+=========================================+=======+ +Epoch 23 || Sparsity level of the whole model | 0.620 | +Epoch 23 |+-----------------------------------------+-------+ +Epoch 23 || Sparsity level of all sparsified layers | 0.800 | +Epoch 23 |+-----------------------------------------+-------+ +Epoch 23 | +Epoch 23 |Statistics by sparsified layers: +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 23 |+======================+================+================+=====================+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.612 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[0]/Be | | | | +Epoch 23 || rtAttention[attentio | | | | +Epoch 23 || n]/BertSelfAttention | | | | +Epoch 23 || [self]/NNCFLinear[qu | | | | +Epoch 23 || ery]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.620 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[0]/Be | | | | +Epoch 23 || rtAttention[attentio | | | | +Epoch 23 || n]/BertSelfAttention | | | | +Epoch 23 || [self]/NNCFLinear[ke | | | | +Epoch 23 || y]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.621 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[0]/Be | | | | +Epoch 23 || rtAttention[attentio | | | | +Epoch 23 || n]/BertSelfAttention | | | | +Epoch 23 || [self]/NNCFLinear[va | | | | +Epoch 23 || lue]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.642 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[0]/Be | | | | +Epoch 23 || rtAttention[attentio | | | | +Epoch 23 || n]/BertSelfOutput[ou | | | | +Epoch 23 || tput]/NNCFLinear[den | | | | +Epoch 23 || se]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[0]/Be | | | | +Epoch 23 || rtIntermediate[inter | | | | +Epoch 23 || mediate]/NNCFLinear[ | | | | +Epoch 23 || dense]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 3072] | 0.897 | 2.778 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[0]/Be | | | | +Epoch 23 || rtOutput[output]/NNC | | | | +Epoch 23 || FLinear[dense]/linea | | | | +Epoch 23 || r_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.609 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[1]/Be | | | | +Epoch 23 || rtAttention[attentio | | | | +Epoch 23 || n]/BertSelfAttention | | | | +Epoch 23 || [self]/NNCFLinear[qu | | | | +Epoch 23 || ery]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.612 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[1]/Be | | | | +Epoch 23 || rtAttention[attentio | | | | +Epoch 23 || n]/BertSelfAttention | | | | +Epoch 23 || [self]/NNCFLinear[ke | | | | +Epoch 23 || y]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.622 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[1]/Be | | | | +Epoch 23 || rtAttention[attentio | | | | +Epoch 23 || n]/BertSelfAttention | | | | +Epoch 23 || [self]/NNCFLinear[va | | | | +Epoch 23 || lue]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.643 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[1]/Be | | | | +Epoch 23 || rtAttention[attentio | | | | +Epoch 23 || n]/BertSelfOutput[ou | | | | +Epoch 23 || tput]/NNCFLinear[den | | | | +Epoch 23 || se]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[1]/Be | | | | +Epoch 23 || rtIntermediate[inter | | | | +Epoch 23 || mediate]/NNCFLinear[ | | | | +Epoch 23 || dense]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 3072] | 0.898 | 2.778 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[1]/Be | | | | +Epoch 23 || rtOutput[output]/NNC | | | | +Epoch 23 || FLinear[dense]/linea | | | | +Epoch 23 || r_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.624 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[2]/Be | | | | +Epoch 23 || rtAttention[attentio | | | | +Epoch 23 || n]/BertSelfAttention | | | | +Epoch 23 || [self]/NNCFLinear[qu | | | | +Epoch 23 || ery]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.625 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[2]/Be | | | | +Epoch 23 || rtAttention[attentio | | | | +Epoch 23 || n]/BertSelfAttention | | | | +Epoch 23 || [self]/NNCFLinear[ke | | | | +Epoch 23 || y]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.625 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[2]/Be | | | | +Epoch 23 || rtAttention[attentio | | | | +Epoch 23 || n]/BertSelfAttention | | | | +Epoch 23 || [self]/NNCFLinear[va | | | | +Epoch 23 || lue]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.637 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[2]/Be | | | | +Epoch 23 || rtAttention[attentio | | | | +Epoch 23 || n]/BertSelfOutput[ou | | | | +Epoch 23 || tput]/NNCFLinear[den | | | | +Epoch 23 || se]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [3072, 768] | 0.892 | 2.778 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[2]/Be | | | | +Epoch 23 || rtIntermediate[inter | | | | +Epoch 23 || mediate]/NNCFLinear[ | | | | +Epoch 23 || dense]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 3072] | 0.897 | 2.778 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[2]/Be | | | | +Epoch 23 || rtOutput[output]/NNC | | | | +Epoch 23 || FLinear[dense]/linea | | | | +Epoch 23 || r_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.606 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[3]/Be | | | | +Epoch 23 || rtAttention[attentio | | | | +Epoch 23 || n]/BertSelfAttention | | | | +Epoch 23 || [self]/NNCFLinear[qu | | | | +Epoch 23 || ery]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.608 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[3]/Be | | | | +Epoch 23 || rtAttention[attentio | | | | +Epoch 23 || n]/BertSelfAttention | | | | +Epoch 23 || [self]/NNCFLinear[ke | | | | +Epoch 23 || y]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.621 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[3]/Be | | | | +Epoch 23 || rtAttention[attentio | | | | +Epoch 23 || n]/BertSelfAttention | | | | +Epoch 23 || [self]/NNCFLinear[va | | | | +Epoch 23 || lue]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.631 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[3]/Be | | | | +Epoch 23 || rtAttention[attentio | | | | +Epoch 23 || n]/BertSelfOutput[ou | | | | +Epoch 23 || tput]/NNCFLinear[den | | | | +Epoch 23 || se]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [3072, 768] | 0.892 | 2.778 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[3]/Be | | | | +Epoch 23 || rtIntermediate[inter | | | | +Epoch 23 || mediate]/NNCFLinear[ | | | | +Epoch 23 || dense]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 3072] | 0.899 | 2.778 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[3]/Be | | | | +Epoch 23 || rtOutput[output]/NNC | | | | +Epoch 23 || FLinear[dense]/linea | | | | +Epoch 23 || r_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.603 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[4]/Be | | | | +Epoch 23 || rtAttention[attentio | | | | +Epoch 23 || n]/BertSelfAttention | | | | +Epoch 23 || [self]/NNCFLinear[qu | | | | +Epoch 23 || ery]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.603 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[4]/Be | | | | +Epoch 23 || rtAttention[attentio | | | | +Epoch 23 || n]/BertSelfAttention | | | | +Epoch 23 || [self]/NNCFLinear[ke | | | | +Epoch 23 || y]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.612 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[4]/Be | | | | +Epoch 23 || rtAttention[attentio | | | | +Epoch 23 || n]/BertSelfAttention | | | | +Epoch 23 || [self]/NNCFLinear[va | | | | +Epoch 23 || lue]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.625 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[4]/Be | | | | +Epoch 23 || rtAttention[attentio | | | | +Epoch 23 || n]/BertSelfOutput[ou | | | | +Epoch 23 || tput]/NNCFLinear[den | | | | +Epoch 23 || se]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[4]/Be | | | | +Epoch 23 || rtIntermediate[inter | | | | +Epoch 23 || mediate]/NNCFLinear[ | | | | +Epoch 23 || dense]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 3072] | 0.898 | 2.778 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[4]/Be | | | | +Epoch 23 || rtOutput[output]/NNC | | | | +Epoch 23 || FLinear[dense]/linea | | | | +Epoch 23 || r_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.601 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[5]/Be | | | | +Epoch 23 || rtAttention[attentio | | | | +Epoch 23 || n]/BertSelfAttention | | | | +Epoch 23 || [self]/NNCFLinear[qu | | | | +Epoch 23 || ery]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.603 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[5]/Be | | | | +Epoch 23 || rtAttention[attentio | | | | +Epoch 23 || n]/BertSelfAttention | | | | +Epoch 23 || [self]/NNCFLinear[ke | | | | +Epoch 23 || y]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.617 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[5]/Be | | | | +Epoch 23 || rtAttention[attentio | | | | +Epoch 23 || n]/BertSelfAttention | | | | +Epoch 23 || [self]/NNCFLinear[va | | | | +Epoch 23 || lue]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.624 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[5]/Be | | | | +Epoch 23 || rtAttention[attentio | | | | +Epoch 23 || n]/BertSelfOutput[ou | | | | +Epoch 23 || tput]/NNCFLinear[den | | | | +Epoch 23 || se]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [3072, 768] | 0.892 | 2.778 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[5]/Be | | | | +Epoch 23 || rtIntermediate[inter | | | | +Epoch 23 || mediate]/NNCFLinear[ | | | | +Epoch 23 || dense]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 3072] | 0.898 | 2.778 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[5]/Be | | | | +Epoch 23 || rtOutput[output]/NNC | | | | +Epoch 23 || FLinear[dense]/linea | | | | +Epoch 23 || r_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[6]/Be | | | | +Epoch 23 || rtAttention[attentio | | | | +Epoch 23 || n]/BertSelfAttention | | | | +Epoch 23 || [self]/NNCFLinear[qu | | | | +Epoch 23 || ery]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.601 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[6]/Be | | | | +Epoch 23 || rtAttention[attentio | | | | +Epoch 23 || n]/BertSelfAttention | | | | +Epoch 23 || [self]/NNCFLinear[ke | | | | +Epoch 23 || y]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.616 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[6]/Be | | | | +Epoch 23 || rtAttention[attentio | | | | +Epoch 23 || n]/BertSelfAttention | | | | +Epoch 23 || [self]/NNCFLinear[va | | | | +Epoch 23 || lue]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.624 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[6]/Be | | | | +Epoch 23 || rtAttention[attentio | | | | +Epoch 23 || n]/BertSelfOutput[ou | | | | +Epoch 23 || tput]/NNCFLinear[den | | | | +Epoch 23 || se]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[6]/Be | | | | +Epoch 23 || rtIntermediate[inter | | | | +Epoch 23 || mediate]/NNCFLinear[ | | | | +Epoch 23 || dense]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 3072] | 0.896 | 2.778 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[6]/Be | | | | +Epoch 23 || rtOutput[output]/NNC | | | | +Epoch 23 || FLinear[dense]/linea | | | | +Epoch 23 || r_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[7]/Be | | | | +Epoch 23 || rtAttention[attentio | | | | +Epoch 23 || n]/BertSelfAttention | | | | +Epoch 23 || [self]/NNCFLinear[qu | | | | +Epoch 23 || ery]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[7]/Be | | | | +Epoch 23 || rtAttention[attentio | | | | +Epoch 23 || n]/BertSelfAttention | | | | +Epoch 23 || [self]/NNCFLinear[ke | | | | +Epoch 23 || y]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.612 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[7]/Be | | | | +Epoch 23 || rtAttention[attentio | | | | +Epoch 23 || n]/BertSelfAttention | | | | +Epoch 23 || [self]/NNCFLinear[va | | | | +Epoch 23 || lue]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.618 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[7]/Be | | | | +Epoch 23 || rtAttention[attentio | | | | +Epoch 23 || n]/BertSelfOutput[ou | | | | +Epoch 23 || tput]/NNCFLinear[den | | | | +Epoch 23 || se]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[7]/Be | | | | +Epoch 23 || rtIntermediate[inter | | | | +Epoch 23 || mediate]/NNCFLinear[ | | | | +Epoch 23 || dense]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 3072] | 0.895 | 2.778 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[7]/Be | | | | +Epoch 23 || rtOutput[output]/NNC | | | | +Epoch 23 || FLinear[dense]/linea | | | | +Epoch 23 || r_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.599 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[8]/Be | | | | +Epoch 23 || rtAttention[attentio | | | | +Epoch 23 || n]/BertSelfAttention | | | | +Epoch 23 || [self]/NNCFLinear[qu | | | | +Epoch 23 || ery]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.599 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[8]/Be | | | | +Epoch 23 || rtAttention[attentio | | | | +Epoch 23 || n]/BertSelfAttention | | | | +Epoch 23 || [self]/NNCFLinear[ke | | | | +Epoch 23 || y]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.604 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[8]/Be | | | | +Epoch 23 || rtAttention[attentio | | | | +Epoch 23 || n]/BertSelfAttention | | | | +Epoch 23 || [self]/NNCFLinear[va | | | | +Epoch 23 || lue]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.610 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[8]/Be | | | | +Epoch 23 || rtAttention[attentio | | | | +Epoch 23 || n]/BertSelfOutput[ou | | | | +Epoch 23 || tput]/NNCFLinear[den | | | | +Epoch 23 || se]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[8]/Be | | | | +Epoch 23 || rtIntermediate[inter | | | | +Epoch 23 || mediate]/NNCFLinear[ | | | | +Epoch 23 || dense]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 3072] | 0.894 | 2.778 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[8]/Be | | | | +Epoch 23 || rtOutput[output]/NNC | | | | +Epoch 23 || FLinear[dense]/linea | | | | +Epoch 23 || r_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[9]/Be | | | | +Epoch 23 || rtAttention[attentio | | | | +Epoch 23 || n]/BertSelfAttention | | | | +Epoch 23 || [self]/NNCFLinear[qu | | | | +Epoch 23 || ery]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.596 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[9]/Be | | | | +Epoch 23 || rtAttention[attentio | | | | +Epoch 23 || n]/BertSelfAttention | | | | +Epoch 23 || [self]/NNCFLinear[ke | | | | +Epoch 23 || y]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.602 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[9]/Be | | | | +Epoch 23 || rtAttention[attentio | | | | +Epoch 23 || n]/BertSelfAttention | | | | +Epoch 23 || [self]/NNCFLinear[va | | | | +Epoch 23 || lue]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.605 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[9]/Be | | | | +Epoch 23 || rtAttention[attentio | | | | +Epoch 23 || n]/BertSelfOutput[ou | | | | +Epoch 23 || tput]/NNCFLinear[den | | | | +Epoch 23 || se]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [3072, 768] | 0.894 | 2.778 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[9]/Be | | | | +Epoch 23 || rtIntermediate[inter | | | | +Epoch 23 || mediate]/NNCFLinear[ | | | | +Epoch 23 || dense]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 3072] | 0.899 | 2.778 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[9]/Be | | | | +Epoch 23 || rtOutput[output]/NNC | | | | +Epoch 23 || FLinear[dense]/linea | | | | +Epoch 23 || r_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.595 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[10]/B | | | | +Epoch 23 || ertAttention[attenti | | | | +Epoch 23 || on]/BertSelfAttentio | | | | +Epoch 23 || n[self]/NNCFLinear[q | | | | +Epoch 23 || uery]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.596 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[10]/B | | | | +Epoch 23 || ertAttention[attenti | | | | +Epoch 23 || on]/BertSelfAttentio | | | | +Epoch 23 || n[self]/NNCFLinear[k | | | | +Epoch 23 || ey]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.611 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[10]/B | | | | +Epoch 23 || ertAttention[attenti | | | | +Epoch 23 || on]/BertSelfAttentio | | | | +Epoch 23 || n[self]/NNCFLinear[v | | | | +Epoch 23 || alue]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.607 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[10]/B | | | | +Epoch 23 || ertAttention[attenti | | | | +Epoch 23 || on]/BertSelfOutput[o | | | | +Epoch 23 || utput]/NNCFLinear[de | | | | +Epoch 23 || nse]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[10]/B | | | | +Epoch 23 || ertIntermediate[inte | | | | +Epoch 23 || rmediate]/NNCFLinear | | | | +Epoch 23 || [dense]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 3072] | 0.896 | 2.778 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[10]/B | | | | +Epoch 23 || ertOutput[output]/NN | | | | +Epoch 23 || CFLinear[dense]/line | | | | +Epoch 23 || ar_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.598 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[11]/B | | | | +Epoch 23 || ertAttention[attenti | | | | +Epoch 23 || on]/BertSelfAttentio | | | | +Epoch 23 || n[self]/NNCFLinear[q | | | | +Epoch 23 || uery]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.594 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[11]/B | | | | +Epoch 23 || ertAttention[attenti | | | | +Epoch 23 || on]/BertSelfAttentio | | | | +Epoch 23 || n[self]/NNCFLinear[k | | | | +Epoch 23 || ey]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.599 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[11]/B | | | | +Epoch 23 || ertAttention[attenti | | | | +Epoch 23 || on]/BertSelfAttentio | | | | +Epoch 23 || n[self]/NNCFLinear[v | | | | +Epoch 23 || alue]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 768] | 0.597 | 0.694 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[11]/B | | | | +Epoch 23 || ertAttention[attenti | | | | +Epoch 23 || on]/BertSelfOutput[o | | | | +Epoch 23 || utput]/NNCFLinear[de | | | | +Epoch 23 || nse]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [3072, 768] | 0.889 | 2.778 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[11]/B | | | | +Epoch 23 || ertIntermediate[inte | | | | +Epoch 23 || rmediate]/NNCFLinear | | | | +Epoch 23 || [dense]/linear_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 || BertForSequenceClass | [768, 3072] | 0.891 | 2.778 | +Epoch 23 || ification/BertModel[ | | | | +Epoch 23 || bert]/BertEncoder[en | | | | +Epoch 23 || coder]/ModuleList[la | | | | +Epoch 23 || yer]/BertLayer[11]/B | | | | +Epoch 23 || ertOutput[output]/NN | | | | +Epoch 23 || CFLinear[dense]/line | | | | +Epoch 23 || ar_0 | | | | +Epoch 23 |+----------------------+----------------+----------------+---------------------+ +Epoch 23 | +Epoch 23 |Statistics of the magnitude sparsity algorithm: +Epoch 23 |+----------------------------------------------------------------------+-------+ +Epoch 23 || Statistic's name | Value | +Epoch 23 |+======================================================================+=======+ +Epoch 23 || A target level of the sparsity for the algorithm for the current | 0.800 | +Epoch 23 || epoch | | +Epoch 23 |+----------------------------------------------------------------------+-------+ +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || Layer's name | Sparsity threshold | +Epoch 23 |+=========================================================+====================+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 23 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 23 || linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 23 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 23 || near_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 23 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 23 || linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 23 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 23 || inear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermed | | +Epoch 23 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[o | | +Epoch 23 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 23 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 23 || linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 23 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 23 || near_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 23 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 23 || linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 23 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 23 || inear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermed | | +Epoch 23 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[o | | +Epoch 23 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 23 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 23 || linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 23 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 23 || near_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 23 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 23 || linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 23 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 23 || inear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermed | | +Epoch 23 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[o | | +Epoch 23 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 23 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 23 || linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 23 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 23 || near_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 23 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 23 || linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 23 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 23 || inear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermed | | +Epoch 23 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[o | | +Epoch 23 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 23 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 23 || linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 23 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 23 || near_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 23 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 23 || linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 23 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 23 || inear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermed | | +Epoch 23 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[o | | +Epoch 23 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 23 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 23 || linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 23 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 23 || near_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 23 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 23 || linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 23 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 23 || inear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermed | | +Epoch 23 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[o | | +Epoch 23 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 23 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 23 || linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 23 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 23 || near_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 23 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 23 || linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 23 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 23 || inear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermed | | +Epoch 23 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[o | | +Epoch 23 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 23 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 23 || linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 23 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 23 || near_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 23 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 23 || linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 23 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 23 || inear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermed | | +Epoch 23 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[o | | +Epoch 23 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 23 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 23 || linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 23 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 23 || near_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 23 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 23 || linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 23 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 23 || inear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermed | | +Epoch 23 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[o | | +Epoch 23 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 23 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 23 || linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 23 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 23 || near_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 23 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 23 || linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 23 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 23 || inear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermed | | +Epoch 23 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[o | | +Epoch 23 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 23 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 23 || /linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 23 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 23 || inear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 23 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 23 || /linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 23 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 23 || linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertInterme | | +Epoch 23 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[ | | +Epoch 23 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 23 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 23 || /linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 23 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 23 || inear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 23 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 23 || /linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 23 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 23 || linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertInterme | | +Epoch 23 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +Epoch 23 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 23 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[ | | +Epoch 23 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 23 |+---------------------------------------------------------+--------------------+ +INFO:nncf:Statistics of the quantization algorithm: +Epoch 24 |+--------------------------------+-------+ +Epoch 24 || Statistic's name | Value | +Epoch 24 |+================================+=======+ +Epoch 24 || Ratio of enabled quantizations | 100 | +Epoch 24 |+--------------------------------+-------+ +Epoch 24 | +Epoch 24 |Statistics of the quantization share: +Epoch 24 |+----------------------------------+--------------------+ +Epoch 24 || Statistic's name | Value | +Epoch 24 |+==================================+====================+ +Epoch 24 || Symmetric WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 24 |+----------------------------------+--------------------+ +Epoch 24 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 24 |+----------------------------------+--------------------+ +Epoch 24 || Signed WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 24 |+----------------------------------+--------------------+ +Epoch 24 || Unsigned WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 24 |+----------------------------------+--------------------+ +Epoch 24 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 24 |+----------------------------------+--------------------+ +Epoch 24 || Per-channel WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 24 |+----------------------------------+--------------------+ +Epoch 24 || Placed WQs / Potential WQs | 72.55 % (74 / 102) | +Epoch 24 |+----------------------------------+--------------------+ +Epoch 24 || Symmetric AQs / All placed AQs | 24.24 % (24 / 99) | +Epoch 24 |+----------------------------------+--------------------+ +Epoch 24 || Asymmetric AQs / All placed AQs | 75.76 % (75 / 99) | +Epoch 24 |+----------------------------------+--------------------+ +Epoch 24 || Signed AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 24 |+----------------------------------+--------------------+ +Epoch 24 || Unsigned AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 24 |+----------------------------------+--------------------+ +Epoch 24 || Per-tensor AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 24 |+----------------------------------+--------------------+ +Epoch 24 || Per-channel AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 24 |+----------------------------------+--------------------+ +Epoch 24 | +Epoch 24 |Statistics of the bitwidth distribution: +Epoch 24 |+--------------+---------------------+--------------------+--------------------+ +Epoch 24 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 24 || | WQs | Placed AQs | Qs | +Epoch 24 |+==============+=====================+====================+====================+ +Epoch 24 || 8 | 100.00 % (74 / 74) | 100.00 % (99 / 99) | 100.00 % (173 / | +Epoch 24 || | | | 173) | +Epoch 24 |+--------------+---------------------+--------------------+--------------------+ +Epoch 24 | +Epoch 24 |Statistics of the sparsified model: +Epoch 24 |+-----------------------------------------+-------+ +Epoch 24 || Statistic's name | Value | +Epoch 24 |+=========================================+=======+ +Epoch 24 || Sparsity level of the whole model | 0.621 | +Epoch 24 |+-----------------------------------------+-------+ +Epoch 24 || Sparsity level of all sparsified layers | 0.800 | +Epoch 24 |+-----------------------------------------+-------+ +Epoch 24 | +Epoch 24 |Statistics by sparsified layers: +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 24 |+======================+================+================+=====================+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.613 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[0]/Be | | | | +Epoch 24 || rtAttention[attentio | | | | +Epoch 24 || n]/BertSelfAttention | | | | +Epoch 24 || [self]/NNCFLinear[qu | | | | +Epoch 24 || ery]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.622 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[0]/Be | | | | +Epoch 24 || rtAttention[attentio | | | | +Epoch 24 || n]/BertSelfAttention | | | | +Epoch 24 || [self]/NNCFLinear[ke | | | | +Epoch 24 || y]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.623 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[0]/Be | | | | +Epoch 24 || rtAttention[attentio | | | | +Epoch 24 || n]/BertSelfAttention | | | | +Epoch 24 || [self]/NNCFLinear[va | | | | +Epoch 24 || lue]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.644 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[0]/Be | | | | +Epoch 24 || rtAttention[attentio | | | | +Epoch 24 || n]/BertSelfOutput[ou | | | | +Epoch 24 || tput]/NNCFLinear[den | | | | +Epoch 24 || se]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[0]/Be | | | | +Epoch 24 || rtIntermediate[inter | | | | +Epoch 24 || mediate]/NNCFLinear[ | | | | +Epoch 24 || dense]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 3072] | 0.897 | 2.778 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[0]/Be | | | | +Epoch 24 || rtOutput[output]/NNC | | | | +Epoch 24 || FLinear[dense]/linea | | | | +Epoch 24 || r_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.610 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[1]/Be | | | | +Epoch 24 || rtAttention[attentio | | | | +Epoch 24 || n]/BertSelfAttention | | | | +Epoch 24 || [self]/NNCFLinear[qu | | | | +Epoch 24 || ery]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.613 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[1]/Be | | | | +Epoch 24 || rtAttention[attentio | | | | +Epoch 24 || n]/BertSelfAttention | | | | +Epoch 24 || [self]/NNCFLinear[ke | | | | +Epoch 24 || y]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.624 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[1]/Be | | | | +Epoch 24 || rtAttention[attentio | | | | +Epoch 24 || n]/BertSelfAttention | | | | +Epoch 24 || [self]/NNCFLinear[va | | | | +Epoch 24 || lue]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.645 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[1]/Be | | | | +Epoch 24 || rtAttention[attentio | | | | +Epoch 24 || n]/BertSelfOutput[ou | | | | +Epoch 24 || tput]/NNCFLinear[den | | | | +Epoch 24 || se]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[1]/Be | | | | +Epoch 24 || rtIntermediate[inter | | | | +Epoch 24 || mediate]/NNCFLinear[ | | | | +Epoch 24 || dense]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 3072] | 0.898 | 2.778 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[1]/Be | | | | +Epoch 24 || rtOutput[output]/NNC | | | | +Epoch 24 || FLinear[dense]/linea | | | | +Epoch 24 || r_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.625 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[2]/Be | | | | +Epoch 24 || rtAttention[attentio | | | | +Epoch 24 || n]/BertSelfAttention | | | | +Epoch 24 || [self]/NNCFLinear[qu | | | | +Epoch 24 || ery]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.626 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[2]/Be | | | | +Epoch 24 || rtAttention[attentio | | | | +Epoch 24 || n]/BertSelfAttention | | | | +Epoch 24 || [self]/NNCFLinear[ke | | | | +Epoch 24 || y]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.627 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[2]/Be | | | | +Epoch 24 || rtAttention[attentio | | | | +Epoch 24 || n]/BertSelfAttention | | | | +Epoch 24 || [self]/NNCFLinear[va | | | | +Epoch 24 || lue]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.639 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[2]/Be | | | | +Epoch 24 || rtAttention[attentio | | | | +Epoch 24 || n]/BertSelfOutput[ou | | | | +Epoch 24 || tput]/NNCFLinear[den | | | | +Epoch 24 || se]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [3072, 768] | 0.892 | 2.778 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[2]/Be | | | | +Epoch 24 || rtIntermediate[inter | | | | +Epoch 24 || mediate]/NNCFLinear[ | | | | +Epoch 24 || dense]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 3072] | 0.897 | 2.778 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[2]/Be | | | | +Epoch 24 || rtOutput[output]/NNC | | | | +Epoch 24 || FLinear[dense]/linea | | | | +Epoch 24 || r_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.607 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[3]/Be | | | | +Epoch 24 || rtAttention[attentio | | | | +Epoch 24 || n]/BertSelfAttention | | | | +Epoch 24 || [self]/NNCFLinear[qu | | | | +Epoch 24 || ery]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.609 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[3]/Be | | | | +Epoch 24 || rtAttention[attentio | | | | +Epoch 24 || n]/BertSelfAttention | | | | +Epoch 24 || [self]/NNCFLinear[ke | | | | +Epoch 24 || y]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.623 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[3]/Be | | | | +Epoch 24 || rtAttention[attentio | | | | +Epoch 24 || n]/BertSelfAttention | | | | +Epoch 24 || [self]/NNCFLinear[va | | | | +Epoch 24 || lue]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.633 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[3]/Be | | | | +Epoch 24 || rtAttention[attentio | | | | +Epoch 24 || n]/BertSelfOutput[ou | | | | +Epoch 24 || tput]/NNCFLinear[den | | | | +Epoch 24 || se]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [3072, 768] | 0.892 | 2.778 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[3]/Be | | | | +Epoch 24 || rtIntermediate[inter | | | | +Epoch 24 || mediate]/NNCFLinear[ | | | | +Epoch 24 || dense]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 3072] | 0.899 | 2.778 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[3]/Be | | | | +Epoch 24 || rtOutput[output]/NNC | | | | +Epoch 24 || FLinear[dense]/linea | | | | +Epoch 24 || r_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.604 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[4]/Be | | | | +Epoch 24 || rtAttention[attentio | | | | +Epoch 24 || n]/BertSelfAttention | | | | +Epoch 24 || [self]/NNCFLinear[qu | | | | +Epoch 24 || ery]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.604 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[4]/Be | | | | +Epoch 24 || rtAttention[attentio | | | | +Epoch 24 || n]/BertSelfAttention | | | | +Epoch 24 || [self]/NNCFLinear[ke | | | | +Epoch 24 || y]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.613 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[4]/Be | | | | +Epoch 24 || rtAttention[attentio | | | | +Epoch 24 || n]/BertSelfAttention | | | | +Epoch 24 || [self]/NNCFLinear[va | | | | +Epoch 24 || lue]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.627 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[4]/Be | | | | +Epoch 24 || rtAttention[attentio | | | | +Epoch 24 || n]/BertSelfOutput[ou | | | | +Epoch 24 || tput]/NNCFLinear[den | | | | +Epoch 24 || se]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[4]/Be | | | | +Epoch 24 || rtIntermediate[inter | | | | +Epoch 24 || mediate]/NNCFLinear[ | | | | +Epoch 24 || dense]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 3072] | 0.898 | 2.778 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[4]/Be | | | | +Epoch 24 || rtOutput[output]/NNC | | | | +Epoch 24 || FLinear[dense]/linea | | | | +Epoch 24 || r_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.603 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[5]/Be | | | | +Epoch 24 || rtAttention[attentio | | | | +Epoch 24 || n]/BertSelfAttention | | | | +Epoch 24 || [self]/NNCFLinear[qu | | | | +Epoch 24 || ery]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.605 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[5]/Be | | | | +Epoch 24 || rtAttention[attentio | | | | +Epoch 24 || n]/BertSelfAttention | | | | +Epoch 24 || [self]/NNCFLinear[ke | | | | +Epoch 24 || y]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.618 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[5]/Be | | | | +Epoch 24 || rtAttention[attentio | | | | +Epoch 24 || n]/BertSelfAttention | | | | +Epoch 24 || [self]/NNCFLinear[va | | | | +Epoch 24 || lue]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.626 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[5]/Be | | | | +Epoch 24 || rtAttention[attentio | | | | +Epoch 24 || n]/BertSelfOutput[ou | | | | +Epoch 24 || tput]/NNCFLinear[den | | | | +Epoch 24 || se]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[5]/Be | | | | +Epoch 24 || rtIntermediate[inter | | | | +Epoch 24 || mediate]/NNCFLinear[ | | | | +Epoch 24 || dense]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 3072] | 0.897 | 2.778 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[5]/Be | | | | +Epoch 24 || rtOutput[output]/NNC | | | | +Epoch 24 || FLinear[dense]/linea | | | | +Epoch 24 || r_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.601 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[6]/Be | | | | +Epoch 24 || rtAttention[attentio | | | | +Epoch 24 || n]/BertSelfAttention | | | | +Epoch 24 || [self]/NNCFLinear[qu | | | | +Epoch 24 || ery]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.602 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[6]/Be | | | | +Epoch 24 || rtAttention[attentio | | | | +Epoch 24 || n]/BertSelfAttention | | | | +Epoch 24 || [self]/NNCFLinear[ke | | | | +Epoch 24 || y]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.618 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[6]/Be | | | | +Epoch 24 || rtAttention[attentio | | | | +Epoch 24 || n]/BertSelfAttention | | | | +Epoch 24 || [self]/NNCFLinear[va | | | | +Epoch 24 || lue]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.626 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[6]/Be | | | | +Epoch 24 || rtAttention[attentio | | | | +Epoch 24 || n]/BertSelfOutput[ou | | | | +Epoch 24 || tput]/NNCFLinear[den | | | | +Epoch 24 || se]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[6]/Be | | | | +Epoch 24 || rtIntermediate[inter | | | | +Epoch 24 || mediate]/NNCFLinear[ | | | | +Epoch 24 || dense]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 3072] | 0.896 | 2.778 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[6]/Be | | | | +Epoch 24 || rtOutput[output]/NNC | | | | +Epoch 24 || FLinear[dense]/linea | | | | +Epoch 24 || r_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.601 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[7]/Be | | | | +Epoch 24 || rtAttention[attentio | | | | +Epoch 24 || n]/BertSelfAttention | | | | +Epoch 24 || [self]/NNCFLinear[qu | | | | +Epoch 24 || ery]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.601 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[7]/Be | | | | +Epoch 24 || rtAttention[attentio | | | | +Epoch 24 || n]/BertSelfAttention | | | | +Epoch 24 || [self]/NNCFLinear[ke | | | | +Epoch 24 || y]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.613 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[7]/Be | | | | +Epoch 24 || rtAttention[attentio | | | | +Epoch 24 || n]/BertSelfAttention | | | | +Epoch 24 || [self]/NNCFLinear[va | | | | +Epoch 24 || lue]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.620 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[7]/Be | | | | +Epoch 24 || rtAttention[attentio | | | | +Epoch 24 || n]/BertSelfOutput[ou | | | | +Epoch 24 || tput]/NNCFLinear[den | | | | +Epoch 24 || se]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[7]/Be | | | | +Epoch 24 || rtIntermediate[inter | | | | +Epoch 24 || mediate]/NNCFLinear[ | | | | +Epoch 24 || dense]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 3072] | 0.894 | 2.778 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[7]/Be | | | | +Epoch 24 || rtOutput[output]/NNC | | | | +Epoch 24 || FLinear[dense]/linea | | | | +Epoch 24 || r_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[8]/Be | | | | +Epoch 24 || rtAttention[attentio | | | | +Epoch 24 || n]/BertSelfAttention | | | | +Epoch 24 || [self]/NNCFLinear[qu | | | | +Epoch 24 || ery]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[8]/Be | | | | +Epoch 24 || rtAttention[attentio | | | | +Epoch 24 || n]/BertSelfAttention | | | | +Epoch 24 || [self]/NNCFLinear[ke | | | | +Epoch 24 || y]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.605 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[8]/Be | | | | +Epoch 24 || rtAttention[attentio | | | | +Epoch 24 || n]/BertSelfAttention | | | | +Epoch 24 || [self]/NNCFLinear[va | | | | +Epoch 24 || lue]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.612 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[8]/Be | | | | +Epoch 24 || rtAttention[attentio | | | | +Epoch 24 || n]/BertSelfOutput[ou | | | | +Epoch 24 || tput]/NNCFLinear[den | | | | +Epoch 24 || se]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [3072, 768] | 0.890 | 2.778 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[8]/Be | | | | +Epoch 24 || rtIntermediate[inter | | | | +Epoch 24 || mediate]/NNCFLinear[ | | | | +Epoch 24 || dense]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 3072] | 0.894 | 2.778 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[8]/Be | | | | +Epoch 24 || rtOutput[output]/NNC | | | | +Epoch 24 || FLinear[dense]/linea | | | | +Epoch 24 || r_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.595 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[9]/Be | | | | +Epoch 24 || rtAttention[attentio | | | | +Epoch 24 || n]/BertSelfAttention | | | | +Epoch 24 || [self]/NNCFLinear[qu | | | | +Epoch 24 || ery]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.597 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[9]/Be | | | | +Epoch 24 || rtAttention[attentio | | | | +Epoch 24 || n]/BertSelfAttention | | | | +Epoch 24 || [self]/NNCFLinear[ke | | | | +Epoch 24 || y]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.604 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[9]/Be | | | | +Epoch 24 || rtAttention[attentio | | | | +Epoch 24 || n]/BertSelfAttention | | | | +Epoch 24 || [self]/NNCFLinear[va | | | | +Epoch 24 || lue]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.607 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[9]/Be | | | | +Epoch 24 || rtAttention[attentio | | | | +Epoch 24 || n]/BertSelfOutput[ou | | | | +Epoch 24 || tput]/NNCFLinear[den | | | | +Epoch 24 || se]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [3072, 768] | 0.894 | 2.778 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[9]/Be | | | | +Epoch 24 || rtIntermediate[inter | | | | +Epoch 24 || mediate]/NNCFLinear[ | | | | +Epoch 24 || dense]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 3072] | 0.899 | 2.778 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[9]/Be | | | | +Epoch 24 || rtOutput[output]/NNC | | | | +Epoch 24 || FLinear[dense]/linea | | | | +Epoch 24 || r_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.596 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[10]/B | | | | +Epoch 24 || ertAttention[attenti | | | | +Epoch 24 || on]/BertSelfAttentio | | | | +Epoch 24 || n[self]/NNCFLinear[q | | | | +Epoch 24 || uery]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.597 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[10]/B | | | | +Epoch 24 || ertAttention[attenti | | | | +Epoch 24 || on]/BertSelfAttentio | | | | +Epoch 24 || n[self]/NNCFLinear[k | | | | +Epoch 24 || ey]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.613 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[10]/B | | | | +Epoch 24 || ertAttention[attenti | | | | +Epoch 24 || on]/BertSelfAttentio | | | | +Epoch 24 || n[self]/NNCFLinear[v | | | | +Epoch 24 || alue]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.608 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[10]/B | | | | +Epoch 24 || ertAttention[attenti | | | | +Epoch 24 || on]/BertSelfOutput[o | | | | +Epoch 24 || utput]/NNCFLinear[de | | | | +Epoch 24 || nse]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[10]/B | | | | +Epoch 24 || ertIntermediate[inte | | | | +Epoch 24 || rmediate]/NNCFLinear | | | | +Epoch 24 || [dense]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 3072] | 0.896 | 2.778 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[10]/B | | | | +Epoch 24 || ertOutput[output]/NN | | | | +Epoch 24 || CFLinear[dense]/line | | | | +Epoch 24 || ar_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.599 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[11]/B | | | | +Epoch 24 || ertAttention[attenti | | | | +Epoch 24 || on]/BertSelfAttentio | | | | +Epoch 24 || n[self]/NNCFLinear[q | | | | +Epoch 24 || uery]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.595 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[11]/B | | | | +Epoch 24 || ertAttention[attenti | | | | +Epoch 24 || on]/BertSelfAttentio | | | | +Epoch 24 || n[self]/NNCFLinear[k | | | | +Epoch 24 || ey]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[11]/B | | | | +Epoch 24 || ertAttention[attenti | | | | +Epoch 24 || on]/BertSelfAttentio | | | | +Epoch 24 || n[self]/NNCFLinear[v | | | | +Epoch 24 || alue]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 768] | 0.598 | 0.694 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[11]/B | | | | +Epoch 24 || ertAttention[attenti | | | | +Epoch 24 || on]/BertSelfOutput[o | | | | +Epoch 24 || utput]/NNCFLinear[de | | | | +Epoch 24 || nse]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [3072, 768] | 0.889 | 2.778 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[11]/B | | | | +Epoch 24 || ertIntermediate[inte | | | | +Epoch 24 || rmediate]/NNCFLinear | | | | +Epoch 24 || [dense]/linear_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 || BertForSequenceClass | [768, 3072] | 0.890 | 2.778 | +Epoch 24 || ification/BertModel[ | | | | +Epoch 24 || bert]/BertEncoder[en | | | | +Epoch 24 || coder]/ModuleList[la | | | | +Epoch 24 || yer]/BertLayer[11]/B | | | | +Epoch 24 || ertOutput[output]/NN | | | | +Epoch 24 || CFLinear[dense]/line | | | | +Epoch 24 || ar_0 | | | | +Epoch 24 |+----------------------+----------------+----------------+---------------------+ +Epoch 24 | +Epoch 24 |Statistics of the magnitude sparsity algorithm: +Epoch 24 |+----------------------------------------------------------------------+-------+ +Epoch 24 || Statistic's name | Value | +Epoch 24 |+======================================================================+=======+ +Epoch 24 || A target level of the sparsity for the algorithm for the current | 0.800 | +Epoch 24 || epoch | | +Epoch 24 |+----------------------------------------------------------------------+-------+ +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || Layer's name | Sparsity threshold | +Epoch 24 |+=========================================================+====================+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 24 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 24 || linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 24 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 24 || near_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 24 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 24 || linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 24 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 24 || inear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermed | | +Epoch 24 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[o | | +Epoch 24 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 24 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 24 || linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 24 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 24 || near_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 24 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 24 || linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 24 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 24 || inear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermed | | +Epoch 24 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[o | | +Epoch 24 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 24 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 24 || linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 24 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 24 || near_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 24 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 24 || linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 24 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 24 || inear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermed | | +Epoch 24 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[o | | +Epoch 24 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 24 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 24 || linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 24 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 24 || near_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 24 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 24 || linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 24 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 24 || inear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermed | | +Epoch 24 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[o | | +Epoch 24 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 24 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 24 || linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 24 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 24 || near_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 24 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 24 || linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 24 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 24 || inear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermed | | +Epoch 24 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[o | | +Epoch 24 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 24 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 24 || linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 24 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 24 || near_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 24 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 24 || linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 24 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 24 || inear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermed | | +Epoch 24 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[o | | +Epoch 24 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 24 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 24 || linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 24 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 24 || near_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 24 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 24 || linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 24 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 24 || inear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermed | | +Epoch 24 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[o | | +Epoch 24 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 24 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 24 || linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 24 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 24 || near_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 24 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 24 || linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 24 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 24 || inear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermed | | +Epoch 24 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[o | | +Epoch 24 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 24 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 24 || linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 24 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 24 || near_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 24 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 24 || linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 24 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 24 || inear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermed | | +Epoch 24 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[o | | +Epoch 24 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 24 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 24 || linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 24 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 24 || near_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 24 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 24 || linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 24 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 24 || inear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermed | | +Epoch 24 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[o | | +Epoch 24 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 24 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 24 || /linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 24 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 24 || inear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 24 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 24 || /linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 24 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 24 || linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertInterme | | +Epoch 24 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[ | | +Epoch 24 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 24 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 24 || /linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 24 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 24 || inear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 24 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 24 || /linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 24 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 24 || linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertInterme | | +Epoch 24 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +Epoch 24 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 24 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[ | | +Epoch 24 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 24 |+---------------------------------------------------------+--------------------+ +INFO:nncf:Statistics of the quantization algorithm: +Epoch 25 |+--------------------------------+-------+ +Epoch 25 || Statistic's name | Value | +Epoch 25 |+================================+=======+ +Epoch 25 || Ratio of enabled quantizations | 100 | +Epoch 25 |+--------------------------------+-------+ +Epoch 25 | +Epoch 25 |Statistics of the quantization share: +Epoch 25 |+----------------------------------+--------------------+ +Epoch 25 || Statistic's name | Value | +Epoch 25 |+==================================+====================+ +Epoch 25 || Symmetric WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 25 |+----------------------------------+--------------------+ +Epoch 25 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 25 |+----------------------------------+--------------------+ +Epoch 25 || Signed WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 25 |+----------------------------------+--------------------+ +Epoch 25 || Unsigned WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 25 |+----------------------------------+--------------------+ +Epoch 25 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 25 |+----------------------------------+--------------------+ +Epoch 25 || Per-channel WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 25 |+----------------------------------+--------------------+ +Epoch 25 || Placed WQs / Potential WQs | 72.55 % (74 / 102) | +Epoch 25 |+----------------------------------+--------------------+ +Epoch 25 || Symmetric AQs / All placed AQs | 24.24 % (24 / 99) | +Epoch 25 |+----------------------------------+--------------------+ +Epoch 25 || Asymmetric AQs / All placed AQs | 75.76 % (75 / 99) | +Epoch 25 |+----------------------------------+--------------------+ +Epoch 25 || Signed AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 25 |+----------------------------------+--------------------+ +Epoch 25 || Unsigned AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 25 |+----------------------------------+--------------------+ +Epoch 25 || Per-tensor AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 25 |+----------------------------------+--------------------+ +Epoch 25 || Per-channel AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 25 |+----------------------------------+--------------------+ +Epoch 25 | +Epoch 25 |Statistics of the bitwidth distribution: +Epoch 25 |+--------------+---------------------+--------------------+--------------------+ +Epoch 25 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 25 || | WQs | Placed AQs | Qs | +Epoch 25 |+==============+=====================+====================+====================+ +Epoch 25 || 8 | 100.00 % (74 / 74) | 100.00 % (99 / 99) | 100.00 % (173 / | +Epoch 25 || | | | 173) | +Epoch 25 |+--------------+---------------------+--------------------+--------------------+ +Epoch 25 | +Epoch 25 |Statistics of the sparsified model: +Epoch 25 |+-----------------------------------------+-------+ +Epoch 25 || Statistic's name | Value | +Epoch 25 |+=========================================+=======+ +Epoch 25 || Sparsity level of the whole model | 0.621 | +Epoch 25 |+-----------------------------------------+-------+ +Epoch 25 || Sparsity level of all sparsified layers | 0.800 | +Epoch 25 |+-----------------------------------------+-------+ +Epoch 25 | +Epoch 25 |Statistics by sparsified layers: +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 25 |+======================+================+================+=====================+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.614 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[0]/Be | | | | +Epoch 25 || rtAttention[attentio | | | | +Epoch 25 || n]/BertSelfAttention | | | | +Epoch 25 || [self]/NNCFLinear[qu | | | | +Epoch 25 || ery]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.622 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[0]/Be | | | | +Epoch 25 || rtAttention[attentio | | | | +Epoch 25 || n]/BertSelfAttention | | | | +Epoch 25 || [self]/NNCFLinear[ke | | | | +Epoch 25 || y]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.623 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[0]/Be | | | | +Epoch 25 || rtAttention[attentio | | | | +Epoch 25 || n]/BertSelfAttention | | | | +Epoch 25 || [self]/NNCFLinear[va | | | | +Epoch 25 || lue]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.644 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[0]/Be | | | | +Epoch 25 || rtAttention[attentio | | | | +Epoch 25 || n]/BertSelfOutput[ou | | | | +Epoch 25 || tput]/NNCFLinear[den | | | | +Epoch 25 || se]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[0]/Be | | | | +Epoch 25 || rtIntermediate[inter | | | | +Epoch 25 || mediate]/NNCFLinear[ | | | | +Epoch 25 || dense]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 3072] | 0.897 | 2.778 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[0]/Be | | | | +Epoch 25 || rtOutput[output]/NNC | | | | +Epoch 25 || FLinear[dense]/linea | | | | +Epoch 25 || r_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.610 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[1]/Be | | | | +Epoch 25 || rtAttention[attentio | | | | +Epoch 25 || n]/BertSelfAttention | | | | +Epoch 25 || [self]/NNCFLinear[qu | | | | +Epoch 25 || ery]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.613 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[1]/Be | | | | +Epoch 25 || rtAttention[attentio | | | | +Epoch 25 || n]/BertSelfAttention | | | | +Epoch 25 || [self]/NNCFLinear[ke | | | | +Epoch 25 || y]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.625 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[1]/Be | | | | +Epoch 25 || rtAttention[attentio | | | | +Epoch 25 || n]/BertSelfAttention | | | | +Epoch 25 || [self]/NNCFLinear[va | | | | +Epoch 25 || lue]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.646 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[1]/Be | | | | +Epoch 25 || rtAttention[attentio | | | | +Epoch 25 || n]/BertSelfOutput[ou | | | | +Epoch 25 || tput]/NNCFLinear[den | | | | +Epoch 25 || se]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[1]/Be | | | | +Epoch 25 || rtIntermediate[inter | | | | +Epoch 25 || mediate]/NNCFLinear[ | | | | +Epoch 25 || dense]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 3072] | 0.898 | 2.778 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[1]/Be | | | | +Epoch 25 || rtOutput[output]/NNC | | | | +Epoch 25 || FLinear[dense]/linea | | | | +Epoch 25 || r_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.625 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[2]/Be | | | | +Epoch 25 || rtAttention[attentio | | | | +Epoch 25 || n]/BertSelfAttention | | | | +Epoch 25 || [self]/NNCFLinear[qu | | | | +Epoch 25 || ery]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.626 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[2]/Be | | | | +Epoch 25 || rtAttention[attentio | | | | +Epoch 25 || n]/BertSelfAttention | | | | +Epoch 25 || [self]/NNCFLinear[ke | | | | +Epoch 25 || y]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.628 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[2]/Be | | | | +Epoch 25 || rtAttention[attentio | | | | +Epoch 25 || n]/BertSelfAttention | | | | +Epoch 25 || [self]/NNCFLinear[va | | | | +Epoch 25 || lue]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.640 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[2]/Be | | | | +Epoch 25 || rtAttention[attentio | | | | +Epoch 25 || n]/BertSelfOutput[ou | | | | +Epoch 25 || tput]/NNCFLinear[den | | | | +Epoch 25 || se]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [3072, 768] | 0.892 | 2.778 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[2]/Be | | | | +Epoch 25 || rtIntermediate[inter | | | | +Epoch 25 || mediate]/NNCFLinear[ | | | | +Epoch 25 || dense]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 3072] | 0.897 | 2.778 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[2]/Be | | | | +Epoch 25 || rtOutput[output]/NNC | | | | +Epoch 25 || FLinear[dense]/linea | | | | +Epoch 25 || r_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.607 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[3]/Be | | | | +Epoch 25 || rtAttention[attentio | | | | +Epoch 25 || n]/BertSelfAttention | | | | +Epoch 25 || [self]/NNCFLinear[qu | | | | +Epoch 25 || ery]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.610 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[3]/Be | | | | +Epoch 25 || rtAttention[attentio | | | | +Epoch 25 || n]/BertSelfAttention | | | | +Epoch 25 || [self]/NNCFLinear[ke | | | | +Epoch 25 || y]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.624 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[3]/Be | | | | +Epoch 25 || rtAttention[attentio | | | | +Epoch 25 || n]/BertSelfAttention | | | | +Epoch 25 || [self]/NNCFLinear[va | | | | +Epoch 25 || lue]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.634 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[3]/Be | | | | +Epoch 25 || rtAttention[attentio | | | | +Epoch 25 || n]/BertSelfOutput[ou | | | | +Epoch 25 || tput]/NNCFLinear[den | | | | +Epoch 25 || se]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [3072, 768] | 0.892 | 2.778 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[3]/Be | | | | +Epoch 25 || rtIntermediate[inter | | | | +Epoch 25 || mediate]/NNCFLinear[ | | | | +Epoch 25 || dense]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 3072] | 0.899 | 2.778 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[3]/Be | | | | +Epoch 25 || rtOutput[output]/NNC | | | | +Epoch 25 || FLinear[dense]/linea | | | | +Epoch 25 || r_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.605 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[4]/Be | | | | +Epoch 25 || rtAttention[attentio | | | | +Epoch 25 || n]/BertSelfAttention | | | | +Epoch 25 || [self]/NNCFLinear[qu | | | | +Epoch 25 || ery]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.605 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[4]/Be | | | | +Epoch 25 || rtAttention[attentio | | | | +Epoch 25 || n]/BertSelfAttention | | | | +Epoch 25 || [self]/NNCFLinear[ke | | | | +Epoch 25 || y]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.614 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[4]/Be | | | | +Epoch 25 || rtAttention[attentio | | | | +Epoch 25 || n]/BertSelfAttention | | | | +Epoch 25 || [self]/NNCFLinear[va | | | | +Epoch 25 || lue]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.627 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[4]/Be | | | | +Epoch 25 || rtAttention[attentio | | | | +Epoch 25 || n]/BertSelfOutput[ou | | | | +Epoch 25 || tput]/NNCFLinear[den | | | | +Epoch 25 || se]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[4]/Be | | | | +Epoch 25 || rtIntermediate[inter | | | | +Epoch 25 || mediate]/NNCFLinear[ | | | | +Epoch 25 || dense]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 3072] | 0.898 | 2.778 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[4]/Be | | | | +Epoch 25 || rtOutput[output]/NNC | | | | +Epoch 25 || FLinear[dense]/linea | | | | +Epoch 25 || r_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.603 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[5]/Be | | | | +Epoch 25 || rtAttention[attentio | | | | +Epoch 25 || n]/BertSelfAttention | | | | +Epoch 25 || [self]/NNCFLinear[qu | | | | +Epoch 25 || ery]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.605 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[5]/Be | | | | +Epoch 25 || rtAttention[attentio | | | | +Epoch 25 || n]/BertSelfAttention | | | | +Epoch 25 || [self]/NNCFLinear[ke | | | | +Epoch 25 || y]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.618 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[5]/Be | | | | +Epoch 25 || rtAttention[attentio | | | | +Epoch 25 || n]/BertSelfAttention | | | | +Epoch 25 || [self]/NNCFLinear[va | | | | +Epoch 25 || lue]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.626 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[5]/Be | | | | +Epoch 25 || rtAttention[attentio | | | | +Epoch 25 || n]/BertSelfOutput[ou | | | | +Epoch 25 || tput]/NNCFLinear[den | | | | +Epoch 25 || se]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[5]/Be | | | | +Epoch 25 || rtIntermediate[inter | | | | +Epoch 25 || mediate]/NNCFLinear[ | | | | +Epoch 25 || dense]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 3072] | 0.897 | 2.778 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[5]/Be | | | | +Epoch 25 || rtOutput[output]/NNC | | | | +Epoch 25 || FLinear[dense]/linea | | | | +Epoch 25 || r_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.601 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[6]/Be | | | | +Epoch 25 || rtAttention[attentio | | | | +Epoch 25 || n]/BertSelfAttention | | | | +Epoch 25 || [self]/NNCFLinear[qu | | | | +Epoch 25 || ery]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.602 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[6]/Be | | | | +Epoch 25 || rtAttention[attentio | | | | +Epoch 25 || n]/BertSelfAttention | | | | +Epoch 25 || [self]/NNCFLinear[ke | | | | +Epoch 25 || y]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.618 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[6]/Be | | | | +Epoch 25 || rtAttention[attentio | | | | +Epoch 25 || n]/BertSelfAttention | | | | +Epoch 25 || [self]/NNCFLinear[va | | | | +Epoch 25 || lue]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.626 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[6]/Be | | | | +Epoch 25 || rtAttention[attentio | | | | +Epoch 25 || n]/BertSelfOutput[ou | | | | +Epoch 25 || tput]/NNCFLinear[den | | | | +Epoch 25 || se]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[6]/Be | | | | +Epoch 25 || rtIntermediate[inter | | | | +Epoch 25 || mediate]/NNCFLinear[ | | | | +Epoch 25 || dense]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 3072] | 0.896 | 2.778 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[6]/Be | | | | +Epoch 25 || rtOutput[output]/NNC | | | | +Epoch 25 || FLinear[dense]/linea | | | | +Epoch 25 || r_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.601 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[7]/Be | | | | +Epoch 25 || rtAttention[attentio | | | | +Epoch 25 || n]/BertSelfAttention | | | | +Epoch 25 || [self]/NNCFLinear[qu | | | | +Epoch 25 || ery]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.601 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[7]/Be | | | | +Epoch 25 || rtAttention[attentio | | | | +Epoch 25 || n]/BertSelfAttention | | | | +Epoch 25 || [self]/NNCFLinear[ke | | | | +Epoch 25 || y]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.614 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[7]/Be | | | | +Epoch 25 || rtAttention[attentio | | | | +Epoch 25 || n]/BertSelfAttention | | | | +Epoch 25 || [self]/NNCFLinear[va | | | | +Epoch 25 || lue]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.620 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[7]/Be | | | | +Epoch 25 || rtAttention[attentio | | | | +Epoch 25 || n]/BertSelfOutput[ou | | | | +Epoch 25 || tput]/NNCFLinear[den | | | | +Epoch 25 || se]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[7]/Be | | | | +Epoch 25 || rtIntermediate[inter | | | | +Epoch 25 || mediate]/NNCFLinear[ | | | | +Epoch 25 || dense]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 3072] | 0.894 | 2.778 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[7]/Be | | | | +Epoch 25 || rtOutput[output]/NNC | | | | +Epoch 25 || FLinear[dense]/linea | | | | +Epoch 25 || r_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[8]/Be | | | | +Epoch 25 || rtAttention[attentio | | | | +Epoch 25 || n]/BertSelfAttention | | | | +Epoch 25 || [self]/NNCFLinear[qu | | | | +Epoch 25 || ery]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[8]/Be | | | | +Epoch 25 || rtAttention[attentio | | | | +Epoch 25 || n]/BertSelfAttention | | | | +Epoch 25 || [self]/NNCFLinear[ke | | | | +Epoch 25 || y]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.606 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[8]/Be | | | | +Epoch 25 || rtAttention[attentio | | | | +Epoch 25 || n]/BertSelfAttention | | | | +Epoch 25 || [self]/NNCFLinear[va | | | | +Epoch 25 || lue]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.612 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[8]/Be | | | | +Epoch 25 || rtAttention[attentio | | | | +Epoch 25 || n]/BertSelfOutput[ou | | | | +Epoch 25 || tput]/NNCFLinear[den | | | | +Epoch 25 || se]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [3072, 768] | 0.890 | 2.778 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[8]/Be | | | | +Epoch 25 || rtIntermediate[inter | | | | +Epoch 25 || mediate]/NNCFLinear[ | | | | +Epoch 25 || dense]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 3072] | 0.894 | 2.778 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[8]/Be | | | | +Epoch 25 || rtOutput[output]/NNC | | | | +Epoch 25 || FLinear[dense]/linea | | | | +Epoch 25 || r_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.595 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[9]/Be | | | | +Epoch 25 || rtAttention[attentio | | | | +Epoch 25 || n]/BertSelfAttention | | | | +Epoch 25 || [self]/NNCFLinear[qu | | | | +Epoch 25 || ery]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.598 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[9]/Be | | | | +Epoch 25 || rtAttention[attentio | | | | +Epoch 25 || n]/BertSelfAttention | | | | +Epoch 25 || [self]/NNCFLinear[ke | | | | +Epoch 25 || y]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.604 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[9]/Be | | | | +Epoch 25 || rtAttention[attentio | | | | +Epoch 25 || n]/BertSelfAttention | | | | +Epoch 25 || [self]/NNCFLinear[va | | | | +Epoch 25 || lue]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.607 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[9]/Be | | | | +Epoch 25 || rtAttention[attentio | | | | +Epoch 25 || n]/BertSelfOutput[ou | | | | +Epoch 25 || tput]/NNCFLinear[den | | | | +Epoch 25 || se]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [3072, 768] | 0.894 | 2.778 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[9]/Be | | | | +Epoch 25 || rtIntermediate[inter | | | | +Epoch 25 || mediate]/NNCFLinear[ | | | | +Epoch 25 || dense]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 3072] | 0.899 | 2.778 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[9]/Be | | | | +Epoch 25 || rtOutput[output]/NNC | | | | +Epoch 25 || FLinear[dense]/linea | | | | +Epoch 25 || r_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.597 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[10]/B | | | | +Epoch 25 || ertAttention[attenti | | | | +Epoch 25 || on]/BertSelfAttentio | | | | +Epoch 25 || n[self]/NNCFLinear[q | | | | +Epoch 25 || uery]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.597 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[10]/B | | | | +Epoch 25 || ertAttention[attenti | | | | +Epoch 25 || on]/BertSelfAttentio | | | | +Epoch 25 || n[self]/NNCFLinear[k | | | | +Epoch 25 || ey]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.613 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[10]/B | | | | +Epoch 25 || ertAttention[attenti | | | | +Epoch 25 || on]/BertSelfAttentio | | | | +Epoch 25 || n[self]/NNCFLinear[v | | | | +Epoch 25 || alue]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.609 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[10]/B | | | | +Epoch 25 || ertAttention[attenti | | | | +Epoch 25 || on]/BertSelfOutput[o | | | | +Epoch 25 || utput]/NNCFLinear[de | | | | +Epoch 25 || nse]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[10]/B | | | | +Epoch 25 || ertIntermediate[inte | | | | +Epoch 25 || rmediate]/NNCFLinear | | | | +Epoch 25 || [dense]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 3072] | 0.896 | 2.778 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[10]/B | | | | +Epoch 25 || ertOutput[output]/NN | | | | +Epoch 25 || CFLinear[dense]/line | | | | +Epoch 25 || ar_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.599 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[11]/B | | | | +Epoch 25 || ertAttention[attenti | | | | +Epoch 25 || on]/BertSelfAttentio | | | | +Epoch 25 || n[self]/NNCFLinear[q | | | | +Epoch 25 || uery]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.596 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[11]/B | | | | +Epoch 25 || ertAttention[attenti | | | | +Epoch 25 || on]/BertSelfAttentio | | | | +Epoch 25 || n[self]/NNCFLinear[k | | | | +Epoch 25 || ey]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[11]/B | | | | +Epoch 25 || ertAttention[attenti | | | | +Epoch 25 || on]/BertSelfAttentio | | | | +Epoch 25 || n[self]/NNCFLinear[v | | | | +Epoch 25 || alue]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 768] | 0.598 | 0.694 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[11]/B | | | | +Epoch 25 || ertAttention[attenti | | | | +Epoch 25 || on]/BertSelfOutput[o | | | | +Epoch 25 || utput]/NNCFLinear[de | | | | +Epoch 25 || nse]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [3072, 768] | 0.889 | 2.778 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[11]/B | | | | +Epoch 25 || ertIntermediate[inte | | | | +Epoch 25 || rmediate]/NNCFLinear | | | | +Epoch 25 || [dense]/linear_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 || BertForSequenceClass | [768, 3072] | 0.890 | 2.778 | +Epoch 25 || ification/BertModel[ | | | | +Epoch 25 || bert]/BertEncoder[en | | | | +Epoch 25 || coder]/ModuleList[la | | | | +Epoch 25 || yer]/BertLayer[11]/B | | | | +Epoch 25 || ertOutput[output]/NN | | | | +Epoch 25 || CFLinear[dense]/line | | | | +Epoch 25 || ar_0 | | | | +Epoch 25 |+----------------------+----------------+----------------+---------------------+ +Epoch 25 | +Epoch 25 |Statistics of the magnitude sparsity algorithm: +Epoch 25 |+----------------------------------------------------------------------+-------+ +Epoch 25 || Statistic's name | Value | +Epoch 25 |+======================================================================+=======+ +Epoch 25 || A target level of the sparsity for the algorithm for the current | 0.800 | +Epoch 25 || epoch | | +Epoch 25 |+----------------------------------------------------------------------+-------+ +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || Layer's name | Sparsity threshold | +Epoch 25 |+=========================================================+====================+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 25 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 25 || linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 25 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 25 || near_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 25 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 25 || linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 25 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 25 || inear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermed | | +Epoch 25 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[o | | +Epoch 25 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 25 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 25 || linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 25 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 25 || near_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 25 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 25 || linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 25 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 25 || inear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermed | | +Epoch 25 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[o | | +Epoch 25 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 25 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 25 || linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 25 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 25 || near_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 25 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 25 || linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 25 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 25 || inear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermed | | +Epoch 25 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[o | | +Epoch 25 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 25 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 25 || linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 25 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 25 || near_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 25 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 25 || linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 25 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 25 || inear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermed | | +Epoch 25 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[o | | +Epoch 25 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 25 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 25 || linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 25 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 25 || near_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 25 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 25 || linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 25 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 25 || inear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermed | | +Epoch 25 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[o | | +Epoch 25 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 25 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 25 || linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 25 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 25 || near_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 25 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 25 || linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 25 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 25 || inear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermed | | +Epoch 25 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[o | | +Epoch 25 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 25 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 25 || linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 25 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 25 || near_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 25 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 25 || linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 25 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 25 || inear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermed | | +Epoch 25 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[o | | +Epoch 25 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 25 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 25 || linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 25 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 25 || near_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 25 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 25 || linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 25 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 25 || inear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermed | | +Epoch 25 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[o | | +Epoch 25 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 25 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 25 || linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 25 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 25 || near_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 25 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 25 || linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 25 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 25 || inear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermed | | +Epoch 25 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[o | | +Epoch 25 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 25 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 25 || linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 25 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 25 || near_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 25 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 25 || linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 25 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 25 || inear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermed | | +Epoch 25 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[o | | +Epoch 25 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 25 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 25 || /linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 25 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 25 || inear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 25 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 25 || /linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 25 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 25 || linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertInterme | | +Epoch 25 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[ | | +Epoch 25 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 25 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 25 || /linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 25 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 25 || inear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 25 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 25 || /linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 25 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 25 || linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertInterme | | +Epoch 25 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +Epoch 25 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 25 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[ | | +Epoch 25 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 25 |+---------------------------------------------------------+--------------------+ +INFO:nncf:Statistics of the quantization algorithm: +Epoch 26 |+--------------------------------+-------+ +Epoch 26 || Statistic's name | Value | +Epoch 26 |+================================+=======+ +Epoch 26 || Ratio of enabled quantizations | 100 | +Epoch 26 |+--------------------------------+-------+ +Epoch 26 | +Epoch 26 |Statistics of the quantization share: +Epoch 26 |+----------------------------------+--------------------+ +Epoch 26 || Statistic's name | Value | +Epoch 26 |+==================================+====================+ +Epoch 26 || Symmetric WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 26 |+----------------------------------+--------------------+ +Epoch 26 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 26 |+----------------------------------+--------------------+ +Epoch 26 || Signed WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 26 |+----------------------------------+--------------------+ +Epoch 26 || Unsigned WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 26 |+----------------------------------+--------------------+ +Epoch 26 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 26 |+----------------------------------+--------------------+ +Epoch 26 || Per-channel WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 26 |+----------------------------------+--------------------+ +Epoch 26 || Placed WQs / Potential WQs | 72.55 % (74 / 102) | +Epoch 26 |+----------------------------------+--------------------+ +Epoch 26 || Symmetric AQs / All placed AQs | 24.24 % (24 / 99) | +Epoch 26 |+----------------------------------+--------------------+ +Epoch 26 || Asymmetric AQs / All placed AQs | 75.76 % (75 / 99) | +Epoch 26 |+----------------------------------+--------------------+ +Epoch 26 || Signed AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 26 |+----------------------------------+--------------------+ +Epoch 26 || Unsigned AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 26 |+----------------------------------+--------------------+ +Epoch 26 || Per-tensor AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 26 |+----------------------------------+--------------------+ +Epoch 26 || Per-channel AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 26 |+----------------------------------+--------------------+ +Epoch 26 | +Epoch 26 |Statistics of the bitwidth distribution: +Epoch 26 |+--------------+---------------------+--------------------+--------------------+ +Epoch 26 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 26 || | WQs | Placed AQs | Qs | +Epoch 26 |+==============+=====================+====================+====================+ +Epoch 26 || 8 | 100.00 % (74 / 74) | 100.00 % (99 / 99) | 100.00 % (173 / | +Epoch 26 || | | | 173) | +Epoch 26 |+--------------+---------------------+--------------------+--------------------+ +Epoch 26 | +Epoch 26 |Statistics of the sparsified model: +Epoch 26 |+-----------------------------------------+-------+ +Epoch 26 || Statistic's name | Value | +Epoch 26 |+=========================================+=======+ +Epoch 26 || Sparsity level of the whole model | 0.621 | +Epoch 26 |+-----------------------------------------+-------+ +Epoch 26 || Sparsity level of all sparsified layers | 0.800 | +Epoch 26 |+-----------------------------------------+-------+ +Epoch 26 | +Epoch 26 |Statistics by sparsified layers: +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 26 |+======================+================+================+=====================+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.614 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[0]/Be | | | | +Epoch 26 || rtAttention[attentio | | | | +Epoch 26 || n]/BertSelfAttention | | | | +Epoch 26 || [self]/NNCFLinear[qu | | | | +Epoch 26 || ery]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.622 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[0]/Be | | | | +Epoch 26 || rtAttention[attentio | | | | +Epoch 26 || n]/BertSelfAttention | | | | +Epoch 26 || [self]/NNCFLinear[ke | | | | +Epoch 26 || y]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.623 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[0]/Be | | | | +Epoch 26 || rtAttention[attentio | | | | +Epoch 26 || n]/BertSelfAttention | | | | +Epoch 26 || [self]/NNCFLinear[va | | | | +Epoch 26 || lue]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.644 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[0]/Be | | | | +Epoch 26 || rtAttention[attentio | | | | +Epoch 26 || n]/BertSelfOutput[ou | | | | +Epoch 26 || tput]/NNCFLinear[den | | | | +Epoch 26 || se]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[0]/Be | | | | +Epoch 26 || rtIntermediate[inter | | | | +Epoch 26 || mediate]/NNCFLinear[ | | | | +Epoch 26 || dense]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 3072] | 0.897 | 2.778 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[0]/Be | | | | +Epoch 26 || rtOutput[output]/NNC | | | | +Epoch 26 || FLinear[dense]/linea | | | | +Epoch 26 || r_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.610 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[1]/Be | | | | +Epoch 26 || rtAttention[attentio | | | | +Epoch 26 || n]/BertSelfAttention | | | | +Epoch 26 || [self]/NNCFLinear[qu | | | | +Epoch 26 || ery]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.613 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[1]/Be | | | | +Epoch 26 || rtAttention[attentio | | | | +Epoch 26 || n]/BertSelfAttention | | | | +Epoch 26 || [self]/NNCFLinear[ke | | | | +Epoch 26 || y]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.625 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[1]/Be | | | | +Epoch 26 || rtAttention[attentio | | | | +Epoch 26 || n]/BertSelfAttention | | | | +Epoch 26 || [self]/NNCFLinear[va | | | | +Epoch 26 || lue]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.646 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[1]/Be | | | | +Epoch 26 || rtAttention[attentio | | | | +Epoch 26 || n]/BertSelfOutput[ou | | | | +Epoch 26 || tput]/NNCFLinear[den | | | | +Epoch 26 || se]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[1]/Be | | | | +Epoch 26 || rtIntermediate[inter | | | | +Epoch 26 || mediate]/NNCFLinear[ | | | | +Epoch 26 || dense]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 3072] | 0.898 | 2.778 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[1]/Be | | | | +Epoch 26 || rtOutput[output]/NNC | | | | +Epoch 26 || FLinear[dense]/linea | | | | +Epoch 26 || r_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.625 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[2]/Be | | | | +Epoch 26 || rtAttention[attentio | | | | +Epoch 26 || n]/BertSelfAttention | | | | +Epoch 26 || [self]/NNCFLinear[qu | | | | +Epoch 26 || ery]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.626 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[2]/Be | | | | +Epoch 26 || rtAttention[attentio | | | | +Epoch 26 || n]/BertSelfAttention | | | | +Epoch 26 || [self]/NNCFLinear[ke | | | | +Epoch 26 || y]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.628 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[2]/Be | | | | +Epoch 26 || rtAttention[attentio | | | | +Epoch 26 || n]/BertSelfAttention | | | | +Epoch 26 || [self]/NNCFLinear[va | | | | +Epoch 26 || lue]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.640 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[2]/Be | | | | +Epoch 26 || rtAttention[attentio | | | | +Epoch 26 || n]/BertSelfOutput[ou | | | | +Epoch 26 || tput]/NNCFLinear[den | | | | +Epoch 26 || se]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [3072, 768] | 0.892 | 2.778 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[2]/Be | | | | +Epoch 26 || rtIntermediate[inter | | | | +Epoch 26 || mediate]/NNCFLinear[ | | | | +Epoch 26 || dense]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 3072] | 0.897 | 2.778 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[2]/Be | | | | +Epoch 26 || rtOutput[output]/NNC | | | | +Epoch 26 || FLinear[dense]/linea | | | | +Epoch 26 || r_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.607 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[3]/Be | | | | +Epoch 26 || rtAttention[attentio | | | | +Epoch 26 || n]/BertSelfAttention | | | | +Epoch 26 || [self]/NNCFLinear[qu | | | | +Epoch 26 || ery]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.610 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[3]/Be | | | | +Epoch 26 || rtAttention[attentio | | | | +Epoch 26 || n]/BertSelfAttention | | | | +Epoch 26 || [self]/NNCFLinear[ke | | | | +Epoch 26 || y]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.624 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[3]/Be | | | | +Epoch 26 || rtAttention[attentio | | | | +Epoch 26 || n]/BertSelfAttention | | | | +Epoch 26 || [self]/NNCFLinear[va | | | | +Epoch 26 || lue]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.634 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[3]/Be | | | | +Epoch 26 || rtAttention[attentio | | | | +Epoch 26 || n]/BertSelfOutput[ou | | | | +Epoch 26 || tput]/NNCFLinear[den | | | | +Epoch 26 || se]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [3072, 768] | 0.892 | 2.778 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[3]/Be | | | | +Epoch 26 || rtIntermediate[inter | | | | +Epoch 26 || mediate]/NNCFLinear[ | | | | +Epoch 26 || dense]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 3072] | 0.899 | 2.778 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[3]/Be | | | | +Epoch 26 || rtOutput[output]/NNC | | | | +Epoch 26 || FLinear[dense]/linea | | | | +Epoch 26 || r_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.605 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[4]/Be | | | | +Epoch 26 || rtAttention[attentio | | | | +Epoch 26 || n]/BertSelfAttention | | | | +Epoch 26 || [self]/NNCFLinear[qu | | | | +Epoch 26 || ery]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.605 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[4]/Be | | | | +Epoch 26 || rtAttention[attentio | | | | +Epoch 26 || n]/BertSelfAttention | | | | +Epoch 26 || [self]/NNCFLinear[ke | | | | +Epoch 26 || y]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.614 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[4]/Be | | | | +Epoch 26 || rtAttention[attentio | | | | +Epoch 26 || n]/BertSelfAttention | | | | +Epoch 26 || [self]/NNCFLinear[va | | | | +Epoch 26 || lue]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.627 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[4]/Be | | | | +Epoch 26 || rtAttention[attentio | | | | +Epoch 26 || n]/BertSelfOutput[ou | | | | +Epoch 26 || tput]/NNCFLinear[den | | | | +Epoch 26 || se]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[4]/Be | | | | +Epoch 26 || rtIntermediate[inter | | | | +Epoch 26 || mediate]/NNCFLinear[ | | | | +Epoch 26 || dense]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 3072] | 0.898 | 2.778 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[4]/Be | | | | +Epoch 26 || rtOutput[output]/NNC | | | | +Epoch 26 || FLinear[dense]/linea | | | | +Epoch 26 || r_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.603 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[5]/Be | | | | +Epoch 26 || rtAttention[attentio | | | | +Epoch 26 || n]/BertSelfAttention | | | | +Epoch 26 || [self]/NNCFLinear[qu | | | | +Epoch 26 || ery]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.605 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[5]/Be | | | | +Epoch 26 || rtAttention[attentio | | | | +Epoch 26 || n]/BertSelfAttention | | | | +Epoch 26 || [self]/NNCFLinear[ke | | | | +Epoch 26 || y]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.618 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[5]/Be | | | | +Epoch 26 || rtAttention[attentio | | | | +Epoch 26 || n]/BertSelfAttention | | | | +Epoch 26 || [self]/NNCFLinear[va | | | | +Epoch 26 || lue]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.626 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[5]/Be | | | | +Epoch 26 || rtAttention[attentio | | | | +Epoch 26 || n]/BertSelfOutput[ou | | | | +Epoch 26 || tput]/NNCFLinear[den | | | | +Epoch 26 || se]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[5]/Be | | | | +Epoch 26 || rtIntermediate[inter | | | | +Epoch 26 || mediate]/NNCFLinear[ | | | | +Epoch 26 || dense]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 3072] | 0.897 | 2.778 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[5]/Be | | | | +Epoch 26 || rtOutput[output]/NNC | | | | +Epoch 26 || FLinear[dense]/linea | | | | +Epoch 26 || r_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.601 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[6]/Be | | | | +Epoch 26 || rtAttention[attentio | | | | +Epoch 26 || n]/BertSelfAttention | | | | +Epoch 26 || [self]/NNCFLinear[qu | | | | +Epoch 26 || ery]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.602 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[6]/Be | | | | +Epoch 26 || rtAttention[attentio | | | | +Epoch 26 || n]/BertSelfAttention | | | | +Epoch 26 || [self]/NNCFLinear[ke | | | | +Epoch 26 || y]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.618 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[6]/Be | | | | +Epoch 26 || rtAttention[attentio | | | | +Epoch 26 || n]/BertSelfAttention | | | | +Epoch 26 || [self]/NNCFLinear[va | | | | +Epoch 26 || lue]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.626 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[6]/Be | | | | +Epoch 26 || rtAttention[attentio | | | | +Epoch 26 || n]/BertSelfOutput[ou | | | | +Epoch 26 || tput]/NNCFLinear[den | | | | +Epoch 26 || se]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[6]/Be | | | | +Epoch 26 || rtIntermediate[inter | | | | +Epoch 26 || mediate]/NNCFLinear[ | | | | +Epoch 26 || dense]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 3072] | 0.896 | 2.778 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[6]/Be | | | | +Epoch 26 || rtOutput[output]/NNC | | | | +Epoch 26 || FLinear[dense]/linea | | | | +Epoch 26 || r_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.601 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[7]/Be | | | | +Epoch 26 || rtAttention[attentio | | | | +Epoch 26 || n]/BertSelfAttention | | | | +Epoch 26 || [self]/NNCFLinear[qu | | | | +Epoch 26 || ery]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.601 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[7]/Be | | | | +Epoch 26 || rtAttention[attentio | | | | +Epoch 26 || n]/BertSelfAttention | | | | +Epoch 26 || [self]/NNCFLinear[ke | | | | +Epoch 26 || y]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.614 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[7]/Be | | | | +Epoch 26 || rtAttention[attentio | | | | +Epoch 26 || n]/BertSelfAttention | | | | +Epoch 26 || [self]/NNCFLinear[va | | | | +Epoch 26 || lue]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.620 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[7]/Be | | | | +Epoch 26 || rtAttention[attentio | | | | +Epoch 26 || n]/BertSelfOutput[ou | | | | +Epoch 26 || tput]/NNCFLinear[den | | | | +Epoch 26 || se]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[7]/Be | | | | +Epoch 26 || rtIntermediate[inter | | | | +Epoch 26 || mediate]/NNCFLinear[ | | | | +Epoch 26 || dense]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 3072] | 0.894 | 2.778 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[7]/Be | | | | +Epoch 26 || rtOutput[output]/NNC | | | | +Epoch 26 || FLinear[dense]/linea | | | | +Epoch 26 || r_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[8]/Be | | | | +Epoch 26 || rtAttention[attentio | | | | +Epoch 26 || n]/BertSelfAttention | | | | +Epoch 26 || [self]/NNCFLinear[qu | | | | +Epoch 26 || ery]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[8]/Be | | | | +Epoch 26 || rtAttention[attentio | | | | +Epoch 26 || n]/BertSelfAttention | | | | +Epoch 26 || [self]/NNCFLinear[ke | | | | +Epoch 26 || y]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.606 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[8]/Be | | | | +Epoch 26 || rtAttention[attentio | | | | +Epoch 26 || n]/BertSelfAttention | | | | +Epoch 26 || [self]/NNCFLinear[va | | | | +Epoch 26 || lue]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.612 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[8]/Be | | | | +Epoch 26 || rtAttention[attentio | | | | +Epoch 26 || n]/BertSelfOutput[ou | | | | +Epoch 26 || tput]/NNCFLinear[den | | | | +Epoch 26 || se]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [3072, 768] | 0.890 | 2.778 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[8]/Be | | | | +Epoch 26 || rtIntermediate[inter | | | | +Epoch 26 || mediate]/NNCFLinear[ | | | | +Epoch 26 || dense]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 3072] | 0.894 | 2.778 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[8]/Be | | | | +Epoch 26 || rtOutput[output]/NNC | | | | +Epoch 26 || FLinear[dense]/linea | | | | +Epoch 26 || r_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.595 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[9]/Be | | | | +Epoch 26 || rtAttention[attentio | | | | +Epoch 26 || n]/BertSelfAttention | | | | +Epoch 26 || [self]/NNCFLinear[qu | | | | +Epoch 26 || ery]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.598 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[9]/Be | | | | +Epoch 26 || rtAttention[attentio | | | | +Epoch 26 || n]/BertSelfAttention | | | | +Epoch 26 || [self]/NNCFLinear[ke | | | | +Epoch 26 || y]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.604 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[9]/Be | | | | +Epoch 26 || rtAttention[attentio | | | | +Epoch 26 || n]/BertSelfAttention | | | | +Epoch 26 || [self]/NNCFLinear[va | | | | +Epoch 26 || lue]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.607 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[9]/Be | | | | +Epoch 26 || rtAttention[attentio | | | | +Epoch 26 || n]/BertSelfOutput[ou | | | | +Epoch 26 || tput]/NNCFLinear[den | | | | +Epoch 26 || se]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [3072, 768] | 0.894 | 2.778 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[9]/Be | | | | +Epoch 26 || rtIntermediate[inter | | | | +Epoch 26 || mediate]/NNCFLinear[ | | | | +Epoch 26 || dense]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 3072] | 0.899 | 2.778 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[9]/Be | | | | +Epoch 26 || rtOutput[output]/NNC | | | | +Epoch 26 || FLinear[dense]/linea | | | | +Epoch 26 || r_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.597 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[10]/B | | | | +Epoch 26 || ertAttention[attenti | | | | +Epoch 26 || on]/BertSelfAttentio | | | | +Epoch 26 || n[self]/NNCFLinear[q | | | | +Epoch 26 || uery]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.597 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[10]/B | | | | +Epoch 26 || ertAttention[attenti | | | | +Epoch 26 || on]/BertSelfAttentio | | | | +Epoch 26 || n[self]/NNCFLinear[k | | | | +Epoch 26 || ey]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.613 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[10]/B | | | | +Epoch 26 || ertAttention[attenti | | | | +Epoch 26 || on]/BertSelfAttentio | | | | +Epoch 26 || n[self]/NNCFLinear[v | | | | +Epoch 26 || alue]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.609 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[10]/B | | | | +Epoch 26 || ertAttention[attenti | | | | +Epoch 26 || on]/BertSelfOutput[o | | | | +Epoch 26 || utput]/NNCFLinear[de | | | | +Epoch 26 || nse]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[10]/B | | | | +Epoch 26 || ertIntermediate[inte | | | | +Epoch 26 || rmediate]/NNCFLinear | | | | +Epoch 26 || [dense]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 3072] | 0.896 | 2.778 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[10]/B | | | | +Epoch 26 || ertOutput[output]/NN | | | | +Epoch 26 || CFLinear[dense]/line | | | | +Epoch 26 || ar_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.599 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[11]/B | | | | +Epoch 26 || ertAttention[attenti | | | | +Epoch 26 || on]/BertSelfAttentio | | | | +Epoch 26 || n[self]/NNCFLinear[q | | | | +Epoch 26 || uery]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.596 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[11]/B | | | | +Epoch 26 || ertAttention[attenti | | | | +Epoch 26 || on]/BertSelfAttentio | | | | +Epoch 26 || n[self]/NNCFLinear[k | | | | +Epoch 26 || ey]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[11]/B | | | | +Epoch 26 || ertAttention[attenti | | | | +Epoch 26 || on]/BertSelfAttentio | | | | +Epoch 26 || n[self]/NNCFLinear[v | | | | +Epoch 26 || alue]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 768] | 0.598 | 0.694 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[11]/B | | | | +Epoch 26 || ertAttention[attenti | | | | +Epoch 26 || on]/BertSelfOutput[o | | | | +Epoch 26 || utput]/NNCFLinear[de | | | | +Epoch 26 || nse]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [3072, 768] | 0.889 | 2.778 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[11]/B | | | | +Epoch 26 || ertIntermediate[inte | | | | +Epoch 26 || rmediate]/NNCFLinear | | | | +Epoch 26 || [dense]/linear_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 || BertForSequenceClass | [768, 3072] | 0.890 | 2.778 | +Epoch 26 || ification/BertModel[ | | | | +Epoch 26 || bert]/BertEncoder[en | | | | +Epoch 26 || coder]/ModuleList[la | | | | +Epoch 26 || yer]/BertLayer[11]/B | | | | +Epoch 26 || ertOutput[output]/NN | | | | +Epoch 26 || CFLinear[dense]/line | | | | +Epoch 26 || ar_0 | | | | +Epoch 26 |+----------------------+----------------+----------------+---------------------+ +Epoch 26 | +Epoch 26 |Statistics of the magnitude sparsity algorithm: +Epoch 26 |+----------------------------------------------------------------------+-------+ +Epoch 26 || Statistic's name | Value | +Epoch 26 |+======================================================================+=======+ +Epoch 26 || A target level of the sparsity for the algorithm for the current | 0.800 | +Epoch 26 || epoch | | +Epoch 26 |+----------------------------------------------------------------------+-------+ +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || Layer's name | Sparsity threshold | +Epoch 26 |+=========================================================+====================+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 26 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 26 || linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 26 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 26 || near_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 26 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 26 || linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 26 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 26 || inear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermed | | +Epoch 26 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[o | | +Epoch 26 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 26 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 26 || linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 26 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 26 || near_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 26 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 26 || linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 26 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 26 || inear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermed | | +Epoch 26 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[o | | +Epoch 26 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 26 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 26 || linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 26 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 26 || near_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 26 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 26 || linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 26 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 26 || inear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermed | | +Epoch 26 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[o | | +Epoch 26 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 26 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 26 || linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 26 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 26 || near_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 26 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 26 || linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 26 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 26 || inear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermed | | +Epoch 26 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[o | | +Epoch 26 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 26 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 26 || linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 26 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 26 || near_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 26 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 26 || linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 26 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 26 || inear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermed | | +Epoch 26 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[o | | +Epoch 26 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 26 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 26 || linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 26 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 26 || near_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 26 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 26 || linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 26 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 26 || inear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermed | | +Epoch 26 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[o | | +Epoch 26 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 26 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 26 || linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 26 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 26 || near_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 26 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 26 || linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 26 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 26 || inear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermed | | +Epoch 26 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[o | | +Epoch 26 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 26 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 26 || linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 26 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 26 || near_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 26 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 26 || linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 26 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 26 || inear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermed | | +Epoch 26 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[o | | +Epoch 26 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 26 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 26 || linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 26 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 26 || near_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 26 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 26 || linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 26 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 26 || inear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermed | | +Epoch 26 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[o | | +Epoch 26 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 26 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 26 || linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 26 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 26 || near_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 26 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 26 || linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 26 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 26 || inear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermed | | +Epoch 26 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[o | | +Epoch 26 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 26 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 26 || /linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 26 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 26 || inear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 26 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 26 || /linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 26 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 26 || linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertInterme | | +Epoch 26 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[ | | +Epoch 26 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 26 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 26 || /linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 26 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 26 || inear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 26 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 26 || /linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 26 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 26 || linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertInterme | | +Epoch 26 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +Epoch 26 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 26 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[ | | +Epoch 26 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 26 |+---------------------------------------------------------+--------------------+ +INFO:nncf:Statistics of the quantization algorithm: +Epoch 27 |+--------------------------------+-------+ +Epoch 27 || Statistic's name | Value | +Epoch 27 |+================================+=======+ +Epoch 27 || Ratio of enabled quantizations | 100 | +Epoch 27 |+--------------------------------+-------+ +Epoch 27 | +Epoch 27 |Statistics of the quantization share: +Epoch 27 |+----------------------------------+--------------------+ +Epoch 27 || Statistic's name | Value | +Epoch 27 |+==================================+====================+ +Epoch 27 || Symmetric WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 27 |+----------------------------------+--------------------+ +Epoch 27 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 27 |+----------------------------------+--------------------+ +Epoch 27 || Signed WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 27 |+----------------------------------+--------------------+ +Epoch 27 || Unsigned WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 27 |+----------------------------------+--------------------+ +Epoch 27 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 27 |+----------------------------------+--------------------+ +Epoch 27 || Per-channel WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 27 |+----------------------------------+--------------------+ +Epoch 27 || Placed WQs / Potential WQs | 72.55 % (74 / 102) | +Epoch 27 |+----------------------------------+--------------------+ +Epoch 27 || Symmetric AQs / All placed AQs | 24.24 % (24 / 99) | +Epoch 27 |+----------------------------------+--------------------+ +Epoch 27 || Asymmetric AQs / All placed AQs | 75.76 % (75 / 99) | +Epoch 27 |+----------------------------------+--------------------+ +Epoch 27 || Signed AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 27 |+----------------------------------+--------------------+ +Epoch 27 || Unsigned AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 27 |+----------------------------------+--------------------+ +Epoch 27 || Per-tensor AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 27 |+----------------------------------+--------------------+ +Epoch 27 || Per-channel AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 27 |+----------------------------------+--------------------+ +Epoch 27 | +Epoch 27 |Statistics of the bitwidth distribution: +Epoch 27 |+--------------+---------------------+--------------------+--------------------+ +Epoch 27 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 27 || | WQs | Placed AQs | Qs | +Epoch 27 |+==============+=====================+====================+====================+ +Epoch 27 || 8 | 100.00 % (74 / 74) | 100.00 % (99 / 99) | 100.00 % (173 / | +Epoch 27 || | | | 173) | +Epoch 27 |+--------------+---------------------+--------------------+--------------------+ +Epoch 27 | +Epoch 27 |Statistics of the sparsified model: +Epoch 27 |+-----------------------------------------+-------+ +Epoch 27 || Statistic's name | Value | +Epoch 27 |+=========================================+=======+ +Epoch 27 || Sparsity level of the whole model | 0.621 | +Epoch 27 |+-----------------------------------------+-------+ +Epoch 27 || Sparsity level of all sparsified layers | 0.800 | +Epoch 27 |+-----------------------------------------+-------+ +Epoch 27 | +Epoch 27 |Statistics by sparsified layers: +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 27 |+======================+================+================+=====================+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.614 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[0]/Be | | | | +Epoch 27 || rtAttention[attentio | | | | +Epoch 27 || n]/BertSelfAttention | | | | +Epoch 27 || [self]/NNCFLinear[qu | | | | +Epoch 27 || ery]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.622 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[0]/Be | | | | +Epoch 27 || rtAttention[attentio | | | | +Epoch 27 || n]/BertSelfAttention | | | | +Epoch 27 || [self]/NNCFLinear[ke | | | | +Epoch 27 || y]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.623 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[0]/Be | | | | +Epoch 27 || rtAttention[attentio | | | | +Epoch 27 || n]/BertSelfAttention | | | | +Epoch 27 || [self]/NNCFLinear[va | | | | +Epoch 27 || lue]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.644 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[0]/Be | | | | +Epoch 27 || rtAttention[attentio | | | | +Epoch 27 || n]/BertSelfOutput[ou | | | | +Epoch 27 || tput]/NNCFLinear[den | | | | +Epoch 27 || se]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[0]/Be | | | | +Epoch 27 || rtIntermediate[inter | | | | +Epoch 27 || mediate]/NNCFLinear[ | | | | +Epoch 27 || dense]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 3072] | 0.897 | 2.778 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[0]/Be | | | | +Epoch 27 || rtOutput[output]/NNC | | | | +Epoch 27 || FLinear[dense]/linea | | | | +Epoch 27 || r_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.610 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[1]/Be | | | | +Epoch 27 || rtAttention[attentio | | | | +Epoch 27 || n]/BertSelfAttention | | | | +Epoch 27 || [self]/NNCFLinear[qu | | | | +Epoch 27 || ery]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.613 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[1]/Be | | | | +Epoch 27 || rtAttention[attentio | | | | +Epoch 27 || n]/BertSelfAttention | | | | +Epoch 27 || [self]/NNCFLinear[ke | | | | +Epoch 27 || y]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.625 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[1]/Be | | | | +Epoch 27 || rtAttention[attentio | | | | +Epoch 27 || n]/BertSelfAttention | | | | +Epoch 27 || [self]/NNCFLinear[va | | | | +Epoch 27 || lue]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.646 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[1]/Be | | | | +Epoch 27 || rtAttention[attentio | | | | +Epoch 27 || n]/BertSelfOutput[ou | | | | +Epoch 27 || tput]/NNCFLinear[den | | | | +Epoch 27 || se]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[1]/Be | | | | +Epoch 27 || rtIntermediate[inter | | | | +Epoch 27 || mediate]/NNCFLinear[ | | | | +Epoch 27 || dense]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 3072] | 0.898 | 2.778 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[1]/Be | | | | +Epoch 27 || rtOutput[output]/NNC | | | | +Epoch 27 || FLinear[dense]/linea | | | | +Epoch 27 || r_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.625 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[2]/Be | | | | +Epoch 27 || rtAttention[attentio | | | | +Epoch 27 || n]/BertSelfAttention | | | | +Epoch 27 || [self]/NNCFLinear[qu | | | | +Epoch 27 || ery]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.626 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[2]/Be | | | | +Epoch 27 || rtAttention[attentio | | | | +Epoch 27 || n]/BertSelfAttention | | | | +Epoch 27 || [self]/NNCFLinear[ke | | | | +Epoch 27 || y]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.628 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[2]/Be | | | | +Epoch 27 || rtAttention[attentio | | | | +Epoch 27 || n]/BertSelfAttention | | | | +Epoch 27 || [self]/NNCFLinear[va | | | | +Epoch 27 || lue]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.640 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[2]/Be | | | | +Epoch 27 || rtAttention[attentio | | | | +Epoch 27 || n]/BertSelfOutput[ou | | | | +Epoch 27 || tput]/NNCFLinear[den | | | | +Epoch 27 || se]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [3072, 768] | 0.892 | 2.778 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[2]/Be | | | | +Epoch 27 || rtIntermediate[inter | | | | +Epoch 27 || mediate]/NNCFLinear[ | | | | +Epoch 27 || dense]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 3072] | 0.897 | 2.778 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[2]/Be | | | | +Epoch 27 || rtOutput[output]/NNC | | | | +Epoch 27 || FLinear[dense]/linea | | | | +Epoch 27 || r_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.607 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[3]/Be | | | | +Epoch 27 || rtAttention[attentio | | | | +Epoch 27 || n]/BertSelfAttention | | | | +Epoch 27 || [self]/NNCFLinear[qu | | | | +Epoch 27 || ery]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.610 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[3]/Be | | | | +Epoch 27 || rtAttention[attentio | | | | +Epoch 27 || n]/BertSelfAttention | | | | +Epoch 27 || [self]/NNCFLinear[ke | | | | +Epoch 27 || y]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.624 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[3]/Be | | | | +Epoch 27 || rtAttention[attentio | | | | +Epoch 27 || n]/BertSelfAttention | | | | +Epoch 27 || [self]/NNCFLinear[va | | | | +Epoch 27 || lue]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.634 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[3]/Be | | | | +Epoch 27 || rtAttention[attentio | | | | +Epoch 27 || n]/BertSelfOutput[ou | | | | +Epoch 27 || tput]/NNCFLinear[den | | | | +Epoch 27 || se]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [3072, 768] | 0.892 | 2.778 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[3]/Be | | | | +Epoch 27 || rtIntermediate[inter | | | | +Epoch 27 || mediate]/NNCFLinear[ | | | | +Epoch 27 || dense]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 3072] | 0.899 | 2.778 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[3]/Be | | | | +Epoch 27 || rtOutput[output]/NNC | | | | +Epoch 27 || FLinear[dense]/linea | | | | +Epoch 27 || r_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.605 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[4]/Be | | | | +Epoch 27 || rtAttention[attentio | | | | +Epoch 27 || n]/BertSelfAttention | | | | +Epoch 27 || [self]/NNCFLinear[qu | | | | +Epoch 27 || ery]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.605 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[4]/Be | | | | +Epoch 27 || rtAttention[attentio | | | | +Epoch 27 || n]/BertSelfAttention | | | | +Epoch 27 || [self]/NNCFLinear[ke | | | | +Epoch 27 || y]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.614 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[4]/Be | | | | +Epoch 27 || rtAttention[attentio | | | | +Epoch 27 || n]/BertSelfAttention | | | | +Epoch 27 || [self]/NNCFLinear[va | | | | +Epoch 27 || lue]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.627 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[4]/Be | | | | +Epoch 27 || rtAttention[attentio | | | | +Epoch 27 || n]/BertSelfOutput[ou | | | | +Epoch 27 || tput]/NNCFLinear[den | | | | +Epoch 27 || se]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[4]/Be | | | | +Epoch 27 || rtIntermediate[inter | | | | +Epoch 27 || mediate]/NNCFLinear[ | | | | +Epoch 27 || dense]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 3072] | 0.898 | 2.778 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[4]/Be | | | | +Epoch 27 || rtOutput[output]/NNC | | | | +Epoch 27 || FLinear[dense]/linea | | | | +Epoch 27 || r_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.603 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[5]/Be | | | | +Epoch 27 || rtAttention[attentio | | | | +Epoch 27 || n]/BertSelfAttention | | | | +Epoch 27 || [self]/NNCFLinear[qu | | | | +Epoch 27 || ery]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.605 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[5]/Be | | | | +Epoch 27 || rtAttention[attentio | | | | +Epoch 27 || n]/BertSelfAttention | | | | +Epoch 27 || [self]/NNCFLinear[ke | | | | +Epoch 27 || y]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.618 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[5]/Be | | | | +Epoch 27 || rtAttention[attentio | | | | +Epoch 27 || n]/BertSelfAttention | | | | +Epoch 27 || [self]/NNCFLinear[va | | | | +Epoch 27 || lue]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.626 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[5]/Be | | | | +Epoch 27 || rtAttention[attentio | | | | +Epoch 27 || n]/BertSelfOutput[ou | | | | +Epoch 27 || tput]/NNCFLinear[den | | | | +Epoch 27 || se]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[5]/Be | | | | +Epoch 27 || rtIntermediate[inter | | | | +Epoch 27 || mediate]/NNCFLinear[ | | | | +Epoch 27 || dense]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 3072] | 0.897 | 2.778 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[5]/Be | | | | +Epoch 27 || rtOutput[output]/NNC | | | | +Epoch 27 || FLinear[dense]/linea | | | | +Epoch 27 || r_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.601 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[6]/Be | | | | +Epoch 27 || rtAttention[attentio | | | | +Epoch 27 || n]/BertSelfAttention | | | | +Epoch 27 || [self]/NNCFLinear[qu | | | | +Epoch 27 || ery]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.602 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[6]/Be | | | | +Epoch 27 || rtAttention[attentio | | | | +Epoch 27 || n]/BertSelfAttention | | | | +Epoch 27 || [self]/NNCFLinear[ke | | | | +Epoch 27 || y]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.618 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[6]/Be | | | | +Epoch 27 || rtAttention[attentio | | | | +Epoch 27 || n]/BertSelfAttention | | | | +Epoch 27 || [self]/NNCFLinear[va | | | | +Epoch 27 || lue]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.626 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[6]/Be | | | | +Epoch 27 || rtAttention[attentio | | | | +Epoch 27 || n]/BertSelfOutput[ou | | | | +Epoch 27 || tput]/NNCFLinear[den | | | | +Epoch 27 || se]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[6]/Be | | | | +Epoch 27 || rtIntermediate[inter | | | | +Epoch 27 || mediate]/NNCFLinear[ | | | | +Epoch 27 || dense]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 3072] | 0.896 | 2.778 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[6]/Be | | | | +Epoch 27 || rtOutput[output]/NNC | | | | +Epoch 27 || FLinear[dense]/linea | | | | +Epoch 27 || r_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.601 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[7]/Be | | | | +Epoch 27 || rtAttention[attentio | | | | +Epoch 27 || n]/BertSelfAttention | | | | +Epoch 27 || [self]/NNCFLinear[qu | | | | +Epoch 27 || ery]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.601 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[7]/Be | | | | +Epoch 27 || rtAttention[attentio | | | | +Epoch 27 || n]/BertSelfAttention | | | | +Epoch 27 || [self]/NNCFLinear[ke | | | | +Epoch 27 || y]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.614 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[7]/Be | | | | +Epoch 27 || rtAttention[attentio | | | | +Epoch 27 || n]/BertSelfAttention | | | | +Epoch 27 || [self]/NNCFLinear[va | | | | +Epoch 27 || lue]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.620 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[7]/Be | | | | +Epoch 27 || rtAttention[attentio | | | | +Epoch 27 || n]/BertSelfOutput[ou | | | | +Epoch 27 || tput]/NNCFLinear[den | | | | +Epoch 27 || se]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[7]/Be | | | | +Epoch 27 || rtIntermediate[inter | | | | +Epoch 27 || mediate]/NNCFLinear[ | | | | +Epoch 27 || dense]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 3072] | 0.894 | 2.778 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[7]/Be | | | | +Epoch 27 || rtOutput[output]/NNC | | | | +Epoch 27 || FLinear[dense]/linea | | | | +Epoch 27 || r_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[8]/Be | | | | +Epoch 27 || rtAttention[attentio | | | | +Epoch 27 || n]/BertSelfAttention | | | | +Epoch 27 || [self]/NNCFLinear[qu | | | | +Epoch 27 || ery]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[8]/Be | | | | +Epoch 27 || rtAttention[attentio | | | | +Epoch 27 || n]/BertSelfAttention | | | | +Epoch 27 || [self]/NNCFLinear[ke | | | | +Epoch 27 || y]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.606 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[8]/Be | | | | +Epoch 27 || rtAttention[attentio | | | | +Epoch 27 || n]/BertSelfAttention | | | | +Epoch 27 || [self]/NNCFLinear[va | | | | +Epoch 27 || lue]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.612 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[8]/Be | | | | +Epoch 27 || rtAttention[attentio | | | | +Epoch 27 || n]/BertSelfOutput[ou | | | | +Epoch 27 || tput]/NNCFLinear[den | | | | +Epoch 27 || se]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [3072, 768] | 0.890 | 2.778 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[8]/Be | | | | +Epoch 27 || rtIntermediate[inter | | | | +Epoch 27 || mediate]/NNCFLinear[ | | | | +Epoch 27 || dense]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 3072] | 0.894 | 2.778 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[8]/Be | | | | +Epoch 27 || rtOutput[output]/NNC | | | | +Epoch 27 || FLinear[dense]/linea | | | | +Epoch 27 || r_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.595 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[9]/Be | | | | +Epoch 27 || rtAttention[attentio | | | | +Epoch 27 || n]/BertSelfAttention | | | | +Epoch 27 || [self]/NNCFLinear[qu | | | | +Epoch 27 || ery]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.598 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[9]/Be | | | | +Epoch 27 || rtAttention[attentio | | | | +Epoch 27 || n]/BertSelfAttention | | | | +Epoch 27 || [self]/NNCFLinear[ke | | | | +Epoch 27 || y]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.604 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[9]/Be | | | | +Epoch 27 || rtAttention[attentio | | | | +Epoch 27 || n]/BertSelfAttention | | | | +Epoch 27 || [self]/NNCFLinear[va | | | | +Epoch 27 || lue]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.607 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[9]/Be | | | | +Epoch 27 || rtAttention[attentio | | | | +Epoch 27 || n]/BertSelfOutput[ou | | | | +Epoch 27 || tput]/NNCFLinear[den | | | | +Epoch 27 || se]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [3072, 768] | 0.894 | 2.778 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[9]/Be | | | | +Epoch 27 || rtIntermediate[inter | | | | +Epoch 27 || mediate]/NNCFLinear[ | | | | +Epoch 27 || dense]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 3072] | 0.899 | 2.778 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[9]/Be | | | | +Epoch 27 || rtOutput[output]/NNC | | | | +Epoch 27 || FLinear[dense]/linea | | | | +Epoch 27 || r_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.597 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[10]/B | | | | +Epoch 27 || ertAttention[attenti | | | | +Epoch 27 || on]/BertSelfAttentio | | | | +Epoch 27 || n[self]/NNCFLinear[q | | | | +Epoch 27 || uery]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.597 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[10]/B | | | | +Epoch 27 || ertAttention[attenti | | | | +Epoch 27 || on]/BertSelfAttentio | | | | +Epoch 27 || n[self]/NNCFLinear[k | | | | +Epoch 27 || ey]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.613 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[10]/B | | | | +Epoch 27 || ertAttention[attenti | | | | +Epoch 27 || on]/BertSelfAttentio | | | | +Epoch 27 || n[self]/NNCFLinear[v | | | | +Epoch 27 || alue]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.609 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[10]/B | | | | +Epoch 27 || ertAttention[attenti | | | | +Epoch 27 || on]/BertSelfOutput[o | | | | +Epoch 27 || utput]/NNCFLinear[de | | | | +Epoch 27 || nse]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[10]/B | | | | +Epoch 27 || ertIntermediate[inte | | | | +Epoch 27 || rmediate]/NNCFLinear | | | | +Epoch 27 || [dense]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 3072] | 0.896 | 2.778 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[10]/B | | | | +Epoch 27 || ertOutput[output]/NN | | | | +Epoch 27 || CFLinear[dense]/line | | | | +Epoch 27 || ar_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.599 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[11]/B | | | | +Epoch 27 || ertAttention[attenti | | | | +Epoch 27 || on]/BertSelfAttentio | | | | +Epoch 27 || n[self]/NNCFLinear[q | | | | +Epoch 27 || uery]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.596 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[11]/B | | | | +Epoch 27 || ertAttention[attenti | | | | +Epoch 27 || on]/BertSelfAttentio | | | | +Epoch 27 || n[self]/NNCFLinear[k | | | | +Epoch 27 || ey]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[11]/B | | | | +Epoch 27 || ertAttention[attenti | | | | +Epoch 27 || on]/BertSelfAttentio | | | | +Epoch 27 || n[self]/NNCFLinear[v | | | | +Epoch 27 || alue]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 768] | 0.598 | 0.694 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[11]/B | | | | +Epoch 27 || ertAttention[attenti | | | | +Epoch 27 || on]/BertSelfOutput[o | | | | +Epoch 27 || utput]/NNCFLinear[de | | | | +Epoch 27 || nse]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [3072, 768] | 0.889 | 2.778 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[11]/B | | | | +Epoch 27 || ertIntermediate[inte | | | | +Epoch 27 || rmediate]/NNCFLinear | | | | +Epoch 27 || [dense]/linear_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 || BertForSequenceClass | [768, 3072] | 0.890 | 2.778 | +Epoch 27 || ification/BertModel[ | | | | +Epoch 27 || bert]/BertEncoder[en | | | | +Epoch 27 || coder]/ModuleList[la | | | | +Epoch 27 || yer]/BertLayer[11]/B | | | | +Epoch 27 || ertOutput[output]/NN | | | | +Epoch 27 || CFLinear[dense]/line | | | | +Epoch 27 || ar_0 | | | | +Epoch 27 |+----------------------+----------------+----------------+---------------------+ +Epoch 27 | +Epoch 27 |Statistics of the magnitude sparsity algorithm: +Epoch 27 |+----------------------------------------------------------------------+-------+ +Epoch 27 || Statistic's name | Value | +Epoch 27 |+======================================================================+=======+ +Epoch 27 || A target level of the sparsity for the algorithm for the current | 0.800 | +Epoch 27 || epoch | | +Epoch 27 |+----------------------------------------------------------------------+-------+ +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || Layer's name | Sparsity threshold | +Epoch 27 |+=========================================================+====================+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 27 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 27 || linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 27 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 27 || near_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 27 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 27 || linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 27 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 27 || inear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermed | | +Epoch 27 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[o | | +Epoch 27 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 27 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 27 || linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 27 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 27 || near_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 27 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 27 || linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 27 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 27 || inear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermed | | +Epoch 27 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[o | | +Epoch 27 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 27 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 27 || linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 27 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 27 || near_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 27 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 27 || linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 27 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 27 || inear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermed | | +Epoch 27 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[o | | +Epoch 27 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 27 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 27 || linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 27 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 27 || near_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 27 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 27 || linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 27 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 27 || inear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermed | | +Epoch 27 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[o | | +Epoch 27 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 27 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 27 || linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 27 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 27 || near_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 27 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 27 || linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 27 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 27 || inear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermed | | +Epoch 27 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[o | | +Epoch 27 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 27 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 27 || linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 27 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 27 || near_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 27 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 27 || linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 27 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 27 || inear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermed | | +Epoch 27 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[o | | +Epoch 27 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 27 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 27 || linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 27 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 27 || near_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 27 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 27 || linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 27 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 27 || inear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermed | | +Epoch 27 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[o | | +Epoch 27 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 27 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 27 || linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 27 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 27 || near_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 27 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 27 || linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 27 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 27 || inear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermed | | +Epoch 27 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[o | | +Epoch 27 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 27 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 27 || linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 27 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 27 || near_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 27 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 27 || linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 27 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 27 || inear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermed | | +Epoch 27 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[o | | +Epoch 27 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 27 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 27 || linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 27 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 27 || near_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 27 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 27 || linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 27 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 27 || inear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermed | | +Epoch 27 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[o | | +Epoch 27 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 27 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 27 || /linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 27 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 27 || inear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 27 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 27 || /linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 27 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 27 || linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertInterme | | +Epoch 27 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[ | | +Epoch 27 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 27 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 27 || /linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 27 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 27 || inear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 27 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 27 || /linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 27 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 27 || linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertInterme | | +Epoch 27 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +Epoch 27 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 27 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[ | | +Epoch 27 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 27 |+---------------------------------------------------------+--------------------+ +INFO:nncf:Statistics of the quantization algorithm: +Epoch 28 |+--------------------------------+-------+ +Epoch 28 || Statistic's name | Value | +Epoch 28 |+================================+=======+ +Epoch 28 || Ratio of enabled quantizations | 100 | +Epoch 28 |+--------------------------------+-------+ +Epoch 28 | +Epoch 28 |Statistics of the quantization share: +Epoch 28 |+----------------------------------+--------------------+ +Epoch 28 || Statistic's name | Value | +Epoch 28 |+==================================+====================+ +Epoch 28 || Symmetric WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 28 |+----------------------------------+--------------------+ +Epoch 28 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 28 |+----------------------------------+--------------------+ +Epoch 28 || Signed WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 28 |+----------------------------------+--------------------+ +Epoch 28 || Unsigned WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 28 |+----------------------------------+--------------------+ +Epoch 28 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 28 |+----------------------------------+--------------------+ +Epoch 28 || Per-channel WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 28 |+----------------------------------+--------------------+ +Epoch 28 || Placed WQs / Potential WQs | 72.55 % (74 / 102) | +Epoch 28 |+----------------------------------+--------------------+ +Epoch 28 || Symmetric AQs / All placed AQs | 24.24 % (24 / 99) | +Epoch 28 |+----------------------------------+--------------------+ +Epoch 28 || Asymmetric AQs / All placed AQs | 75.76 % (75 / 99) | +Epoch 28 |+----------------------------------+--------------------+ +Epoch 28 || Signed AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 28 |+----------------------------------+--------------------+ +Epoch 28 || Unsigned AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 28 |+----------------------------------+--------------------+ +Epoch 28 || Per-tensor AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 28 |+----------------------------------+--------------------+ +Epoch 28 || Per-channel AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 28 |+----------------------------------+--------------------+ +Epoch 28 | +Epoch 28 |Statistics of the bitwidth distribution: +Epoch 28 |+--------------+---------------------+--------------------+--------------------+ +Epoch 28 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 28 || | WQs | Placed AQs | Qs | +Epoch 28 |+==============+=====================+====================+====================+ +Epoch 28 || 8 | 100.00 % (74 / 74) | 100.00 % (99 / 99) | 100.00 % (173 / | +Epoch 28 || | | | 173) | +Epoch 28 |+--------------+---------------------+--------------------+--------------------+ +Epoch 28 | +Epoch 28 |Statistics of the sparsified model: +Epoch 28 |+-----------------------------------------+-------+ +Epoch 28 || Statistic's name | Value | +Epoch 28 |+=========================================+=======+ +Epoch 28 || Sparsity level of the whole model | 0.621 | +Epoch 28 |+-----------------------------------------+-------+ +Epoch 28 || Sparsity level of all sparsified layers | 0.800 | +Epoch 28 |+-----------------------------------------+-------+ +Epoch 28 | +Epoch 28 |Statistics by sparsified layers: +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 28 |+======================+================+================+=====================+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.614 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[0]/Be | | | | +Epoch 28 || rtAttention[attentio | | | | +Epoch 28 || n]/BertSelfAttention | | | | +Epoch 28 || [self]/NNCFLinear[qu | | | | +Epoch 28 || ery]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.622 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[0]/Be | | | | +Epoch 28 || rtAttention[attentio | | | | +Epoch 28 || n]/BertSelfAttention | | | | +Epoch 28 || [self]/NNCFLinear[ke | | | | +Epoch 28 || y]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.623 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[0]/Be | | | | +Epoch 28 || rtAttention[attentio | | | | +Epoch 28 || n]/BertSelfAttention | | | | +Epoch 28 || [self]/NNCFLinear[va | | | | +Epoch 28 || lue]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.644 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[0]/Be | | | | +Epoch 28 || rtAttention[attentio | | | | +Epoch 28 || n]/BertSelfOutput[ou | | | | +Epoch 28 || tput]/NNCFLinear[den | | | | +Epoch 28 || se]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[0]/Be | | | | +Epoch 28 || rtIntermediate[inter | | | | +Epoch 28 || mediate]/NNCFLinear[ | | | | +Epoch 28 || dense]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 3072] | 0.897 | 2.778 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[0]/Be | | | | +Epoch 28 || rtOutput[output]/NNC | | | | +Epoch 28 || FLinear[dense]/linea | | | | +Epoch 28 || r_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.610 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[1]/Be | | | | +Epoch 28 || rtAttention[attentio | | | | +Epoch 28 || n]/BertSelfAttention | | | | +Epoch 28 || [self]/NNCFLinear[qu | | | | +Epoch 28 || ery]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.613 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[1]/Be | | | | +Epoch 28 || rtAttention[attentio | | | | +Epoch 28 || n]/BertSelfAttention | | | | +Epoch 28 || [self]/NNCFLinear[ke | | | | +Epoch 28 || y]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.625 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[1]/Be | | | | +Epoch 28 || rtAttention[attentio | | | | +Epoch 28 || n]/BertSelfAttention | | | | +Epoch 28 || [self]/NNCFLinear[va | | | | +Epoch 28 || lue]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.646 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[1]/Be | | | | +Epoch 28 || rtAttention[attentio | | | | +Epoch 28 || n]/BertSelfOutput[ou | | | | +Epoch 28 || tput]/NNCFLinear[den | | | | +Epoch 28 || se]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[1]/Be | | | | +Epoch 28 || rtIntermediate[inter | | | | +Epoch 28 || mediate]/NNCFLinear[ | | | | +Epoch 28 || dense]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 3072] | 0.898 | 2.778 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[1]/Be | | | | +Epoch 28 || rtOutput[output]/NNC | | | | +Epoch 28 || FLinear[dense]/linea | | | | +Epoch 28 || r_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.625 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[2]/Be | | | | +Epoch 28 || rtAttention[attentio | | | | +Epoch 28 || n]/BertSelfAttention | | | | +Epoch 28 || [self]/NNCFLinear[qu | | | | +Epoch 28 || ery]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.626 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[2]/Be | | | | +Epoch 28 || rtAttention[attentio | | | | +Epoch 28 || n]/BertSelfAttention | | | | +Epoch 28 || [self]/NNCFLinear[ke | | | | +Epoch 28 || y]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.628 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[2]/Be | | | | +Epoch 28 || rtAttention[attentio | | | | +Epoch 28 || n]/BertSelfAttention | | | | +Epoch 28 || [self]/NNCFLinear[va | | | | +Epoch 28 || lue]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.640 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[2]/Be | | | | +Epoch 28 || rtAttention[attentio | | | | +Epoch 28 || n]/BertSelfOutput[ou | | | | +Epoch 28 || tput]/NNCFLinear[den | | | | +Epoch 28 || se]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [3072, 768] | 0.892 | 2.778 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[2]/Be | | | | +Epoch 28 || rtIntermediate[inter | | | | +Epoch 28 || mediate]/NNCFLinear[ | | | | +Epoch 28 || dense]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 3072] | 0.897 | 2.778 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[2]/Be | | | | +Epoch 28 || rtOutput[output]/NNC | | | | +Epoch 28 || FLinear[dense]/linea | | | | +Epoch 28 || r_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.607 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[3]/Be | | | | +Epoch 28 || rtAttention[attentio | | | | +Epoch 28 || n]/BertSelfAttention | | | | +Epoch 28 || [self]/NNCFLinear[qu | | | | +Epoch 28 || ery]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.610 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[3]/Be | | | | +Epoch 28 || rtAttention[attentio | | | | +Epoch 28 || n]/BertSelfAttention | | | | +Epoch 28 || [self]/NNCFLinear[ke | | | | +Epoch 28 || y]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.624 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[3]/Be | | | | +Epoch 28 || rtAttention[attentio | | | | +Epoch 28 || n]/BertSelfAttention | | | | +Epoch 28 || [self]/NNCFLinear[va | | | | +Epoch 28 || lue]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.634 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[3]/Be | | | | +Epoch 28 || rtAttention[attentio | | | | +Epoch 28 || n]/BertSelfOutput[ou | | | | +Epoch 28 || tput]/NNCFLinear[den | | | | +Epoch 28 || se]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [3072, 768] | 0.892 | 2.778 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[3]/Be | | | | +Epoch 28 || rtIntermediate[inter | | | | +Epoch 28 || mediate]/NNCFLinear[ | | | | +Epoch 28 || dense]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 3072] | 0.899 | 2.778 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[3]/Be | | | | +Epoch 28 || rtOutput[output]/NNC | | | | +Epoch 28 || FLinear[dense]/linea | | | | +Epoch 28 || r_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.605 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[4]/Be | | | | +Epoch 28 || rtAttention[attentio | | | | +Epoch 28 || n]/BertSelfAttention | | | | +Epoch 28 || [self]/NNCFLinear[qu | | | | +Epoch 28 || ery]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.605 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[4]/Be | | | | +Epoch 28 || rtAttention[attentio | | | | +Epoch 28 || n]/BertSelfAttention | | | | +Epoch 28 || [self]/NNCFLinear[ke | | | | +Epoch 28 || y]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.614 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[4]/Be | | | | +Epoch 28 || rtAttention[attentio | | | | +Epoch 28 || n]/BertSelfAttention | | | | +Epoch 28 || [self]/NNCFLinear[va | | | | +Epoch 28 || lue]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.627 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[4]/Be | | | | +Epoch 28 || rtAttention[attentio | | | | +Epoch 28 || n]/BertSelfOutput[ou | | | | +Epoch 28 || tput]/NNCFLinear[den | | | | +Epoch 28 || se]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[4]/Be | | | | +Epoch 28 || rtIntermediate[inter | | | | +Epoch 28 || mediate]/NNCFLinear[ | | | | +Epoch 28 || dense]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 3072] | 0.898 | 2.778 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[4]/Be | | | | +Epoch 28 || rtOutput[output]/NNC | | | | +Epoch 28 || FLinear[dense]/linea | | | | +Epoch 28 || r_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.603 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[5]/Be | | | | +Epoch 28 || rtAttention[attentio | | | | +Epoch 28 || n]/BertSelfAttention | | | | +Epoch 28 || [self]/NNCFLinear[qu | | | | +Epoch 28 || ery]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.605 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[5]/Be | | | | +Epoch 28 || rtAttention[attentio | | | | +Epoch 28 || n]/BertSelfAttention | | | | +Epoch 28 || [self]/NNCFLinear[ke | | | | +Epoch 28 || y]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.618 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[5]/Be | | | | +Epoch 28 || rtAttention[attentio | | | | +Epoch 28 || n]/BertSelfAttention | | | | +Epoch 28 || [self]/NNCFLinear[va | | | | +Epoch 28 || lue]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.626 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[5]/Be | | | | +Epoch 28 || rtAttention[attentio | | | | +Epoch 28 || n]/BertSelfOutput[ou | | | | +Epoch 28 || tput]/NNCFLinear[den | | | | +Epoch 28 || se]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[5]/Be | | | | +Epoch 28 || rtIntermediate[inter | | | | +Epoch 28 || mediate]/NNCFLinear[ | | | | +Epoch 28 || dense]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 3072] | 0.897 | 2.778 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[5]/Be | | | | +Epoch 28 || rtOutput[output]/NNC | | | | +Epoch 28 || FLinear[dense]/linea | | | | +Epoch 28 || r_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.601 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[6]/Be | | | | +Epoch 28 || rtAttention[attentio | | | | +Epoch 28 || n]/BertSelfAttention | | | | +Epoch 28 || [self]/NNCFLinear[qu | | | | +Epoch 28 || ery]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.602 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[6]/Be | | | | +Epoch 28 || rtAttention[attentio | | | | +Epoch 28 || n]/BertSelfAttention | | | | +Epoch 28 || [self]/NNCFLinear[ke | | | | +Epoch 28 || y]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.618 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[6]/Be | | | | +Epoch 28 || rtAttention[attentio | | | | +Epoch 28 || n]/BertSelfAttention | | | | +Epoch 28 || [self]/NNCFLinear[va | | | | +Epoch 28 || lue]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.626 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[6]/Be | | | | +Epoch 28 || rtAttention[attentio | | | | +Epoch 28 || n]/BertSelfOutput[ou | | | | +Epoch 28 || tput]/NNCFLinear[den | | | | +Epoch 28 || se]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[6]/Be | | | | +Epoch 28 || rtIntermediate[inter | | | | +Epoch 28 || mediate]/NNCFLinear[ | | | | +Epoch 28 || dense]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 3072] | 0.896 | 2.778 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[6]/Be | | | | +Epoch 28 || rtOutput[output]/NNC | | | | +Epoch 28 || FLinear[dense]/linea | | | | +Epoch 28 || r_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.601 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[7]/Be | | | | +Epoch 28 || rtAttention[attentio | | | | +Epoch 28 || n]/BertSelfAttention | | | | +Epoch 28 || [self]/NNCFLinear[qu | | | | +Epoch 28 || ery]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.601 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[7]/Be | | | | +Epoch 28 || rtAttention[attentio | | | | +Epoch 28 || n]/BertSelfAttention | | | | +Epoch 28 || [self]/NNCFLinear[ke | | | | +Epoch 28 || y]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.614 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[7]/Be | | | | +Epoch 28 || rtAttention[attentio | | | | +Epoch 28 || n]/BertSelfAttention | | | | +Epoch 28 || [self]/NNCFLinear[va | | | | +Epoch 28 || lue]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.620 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[7]/Be | | | | +Epoch 28 || rtAttention[attentio | | | | +Epoch 28 || n]/BertSelfOutput[ou | | | | +Epoch 28 || tput]/NNCFLinear[den | | | | +Epoch 28 || se]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[7]/Be | | | | +Epoch 28 || rtIntermediate[inter | | | | +Epoch 28 || mediate]/NNCFLinear[ | | | | +Epoch 28 || dense]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 3072] | 0.894 | 2.778 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[7]/Be | | | | +Epoch 28 || rtOutput[output]/NNC | | | | +Epoch 28 || FLinear[dense]/linea | | | | +Epoch 28 || r_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[8]/Be | | | | +Epoch 28 || rtAttention[attentio | | | | +Epoch 28 || n]/BertSelfAttention | | | | +Epoch 28 || [self]/NNCFLinear[qu | | | | +Epoch 28 || ery]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[8]/Be | | | | +Epoch 28 || rtAttention[attentio | | | | +Epoch 28 || n]/BertSelfAttention | | | | +Epoch 28 || [self]/NNCFLinear[ke | | | | +Epoch 28 || y]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.606 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[8]/Be | | | | +Epoch 28 || rtAttention[attentio | | | | +Epoch 28 || n]/BertSelfAttention | | | | +Epoch 28 || [self]/NNCFLinear[va | | | | +Epoch 28 || lue]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.612 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[8]/Be | | | | +Epoch 28 || rtAttention[attentio | | | | +Epoch 28 || n]/BertSelfOutput[ou | | | | +Epoch 28 || tput]/NNCFLinear[den | | | | +Epoch 28 || se]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [3072, 768] | 0.890 | 2.778 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[8]/Be | | | | +Epoch 28 || rtIntermediate[inter | | | | +Epoch 28 || mediate]/NNCFLinear[ | | | | +Epoch 28 || dense]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 3072] | 0.894 | 2.778 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[8]/Be | | | | +Epoch 28 || rtOutput[output]/NNC | | | | +Epoch 28 || FLinear[dense]/linea | | | | +Epoch 28 || r_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.595 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[9]/Be | | | | +Epoch 28 || rtAttention[attentio | | | | +Epoch 28 || n]/BertSelfAttention | | | | +Epoch 28 || [self]/NNCFLinear[qu | | | | +Epoch 28 || ery]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.598 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[9]/Be | | | | +Epoch 28 || rtAttention[attentio | | | | +Epoch 28 || n]/BertSelfAttention | | | | +Epoch 28 || [self]/NNCFLinear[ke | | | | +Epoch 28 || y]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.604 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[9]/Be | | | | +Epoch 28 || rtAttention[attentio | | | | +Epoch 28 || n]/BertSelfAttention | | | | +Epoch 28 || [self]/NNCFLinear[va | | | | +Epoch 28 || lue]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.607 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[9]/Be | | | | +Epoch 28 || rtAttention[attentio | | | | +Epoch 28 || n]/BertSelfOutput[ou | | | | +Epoch 28 || tput]/NNCFLinear[den | | | | +Epoch 28 || se]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [3072, 768] | 0.894 | 2.778 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[9]/Be | | | | +Epoch 28 || rtIntermediate[inter | | | | +Epoch 28 || mediate]/NNCFLinear[ | | | | +Epoch 28 || dense]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 3072] | 0.899 | 2.778 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[9]/Be | | | | +Epoch 28 || rtOutput[output]/NNC | | | | +Epoch 28 || FLinear[dense]/linea | | | | +Epoch 28 || r_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.597 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[10]/B | | | | +Epoch 28 || ertAttention[attenti | | | | +Epoch 28 || on]/BertSelfAttentio | | | | +Epoch 28 || n[self]/NNCFLinear[q | | | | +Epoch 28 || uery]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.597 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[10]/B | | | | +Epoch 28 || ertAttention[attenti | | | | +Epoch 28 || on]/BertSelfAttentio | | | | +Epoch 28 || n[self]/NNCFLinear[k | | | | +Epoch 28 || ey]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.613 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[10]/B | | | | +Epoch 28 || ertAttention[attenti | | | | +Epoch 28 || on]/BertSelfAttentio | | | | +Epoch 28 || n[self]/NNCFLinear[v | | | | +Epoch 28 || alue]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.609 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[10]/B | | | | +Epoch 28 || ertAttention[attenti | | | | +Epoch 28 || on]/BertSelfOutput[o | | | | +Epoch 28 || utput]/NNCFLinear[de | | | | +Epoch 28 || nse]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[10]/B | | | | +Epoch 28 || ertIntermediate[inte | | | | +Epoch 28 || rmediate]/NNCFLinear | | | | +Epoch 28 || [dense]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 3072] | 0.896 | 2.778 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[10]/B | | | | +Epoch 28 || ertOutput[output]/NN | | | | +Epoch 28 || CFLinear[dense]/line | | | | +Epoch 28 || ar_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.599 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[11]/B | | | | +Epoch 28 || ertAttention[attenti | | | | +Epoch 28 || on]/BertSelfAttentio | | | | +Epoch 28 || n[self]/NNCFLinear[q | | | | +Epoch 28 || uery]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.596 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[11]/B | | | | +Epoch 28 || ertAttention[attenti | | | | +Epoch 28 || on]/BertSelfAttentio | | | | +Epoch 28 || n[self]/NNCFLinear[k | | | | +Epoch 28 || ey]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[11]/B | | | | +Epoch 28 || ertAttention[attenti | | | | +Epoch 28 || on]/BertSelfAttentio | | | | +Epoch 28 || n[self]/NNCFLinear[v | | | | +Epoch 28 || alue]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 768] | 0.598 | 0.694 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[11]/B | | | | +Epoch 28 || ertAttention[attenti | | | | +Epoch 28 || on]/BertSelfOutput[o | | | | +Epoch 28 || utput]/NNCFLinear[de | | | | +Epoch 28 || nse]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [3072, 768] | 0.889 | 2.778 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[11]/B | | | | +Epoch 28 || ertIntermediate[inte | | | | +Epoch 28 || rmediate]/NNCFLinear | | | | +Epoch 28 || [dense]/linear_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 || BertForSequenceClass | [768, 3072] | 0.890 | 2.778 | +Epoch 28 || ification/BertModel[ | | | | +Epoch 28 || bert]/BertEncoder[en | | | | +Epoch 28 || coder]/ModuleList[la | | | | +Epoch 28 || yer]/BertLayer[11]/B | | | | +Epoch 28 || ertOutput[output]/NN | | | | +Epoch 28 || CFLinear[dense]/line | | | | +Epoch 28 || ar_0 | | | | +Epoch 28 |+----------------------+----------------+----------------+---------------------+ +Epoch 28 | +Epoch 28 |Statistics of the magnitude sparsity algorithm: +Epoch 28 |+----------------------------------------------------------------------+-------+ +Epoch 28 || Statistic's name | Value | +Epoch 28 |+======================================================================+=======+ +Epoch 28 || A target level of the sparsity for the algorithm for the current | 0.800 | +Epoch 28 || epoch | | +Epoch 28 |+----------------------------------------------------------------------+-------+ +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || Layer's name | Sparsity threshold | +Epoch 28 |+=========================================================+====================+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 28 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 28 || linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 28 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 28 || near_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 28 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 28 || linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 28 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 28 || inear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermed | | +Epoch 28 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[o | | +Epoch 28 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 28 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 28 || linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 28 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 28 || near_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 28 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 28 || linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 28 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 28 || inear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermed | | +Epoch 28 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[o | | +Epoch 28 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 28 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 28 || linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 28 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 28 || near_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 28 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 28 || linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 28 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 28 || inear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermed | | +Epoch 28 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[o | | +Epoch 28 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 28 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 28 || linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 28 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 28 || near_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 28 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 28 || linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 28 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 28 || inear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermed | | +Epoch 28 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[o | | +Epoch 28 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 28 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 28 || linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 28 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 28 || near_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 28 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 28 || linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 28 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 28 || inear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermed | | +Epoch 28 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[o | | +Epoch 28 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 28 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 28 || linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 28 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 28 || near_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 28 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 28 || linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 28 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 28 || inear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermed | | +Epoch 28 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[o | | +Epoch 28 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 28 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 28 || linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 28 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 28 || near_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 28 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 28 || linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 28 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 28 || inear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermed | | +Epoch 28 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[o | | +Epoch 28 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 28 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 28 || linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 28 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 28 || near_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 28 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 28 || linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 28 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 28 || inear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermed | | +Epoch 28 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[o | | +Epoch 28 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 28 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 28 || linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 28 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 28 || near_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 28 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 28 || linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 28 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 28 || inear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermed | | +Epoch 28 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[o | | +Epoch 28 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 28 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 28 || linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 28 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 28 || near_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 28 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 28 || linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 28 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 28 || inear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermed | | +Epoch 28 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[o | | +Epoch 28 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 28 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 28 || /linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 28 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 28 || inear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 28 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 28 || /linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 28 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 28 || linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertInterme | | +Epoch 28 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[ | | +Epoch 28 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 28 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 28 || /linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 28 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 28 || inear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 28 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 28 || /linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 28 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 28 || linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertInterme | | +Epoch 28 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +Epoch 28 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 28 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[ | | +Epoch 28 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 28 |+---------------------------------------------------------+--------------------+ +INFO:nncf:Statistics of the quantization algorithm: +Epoch 29 |+--------------------------------+-------+ +Epoch 29 || Statistic's name | Value | +Epoch 29 |+================================+=======+ +Epoch 29 || Ratio of enabled quantizations | 100 | +Epoch 29 |+--------------------------------+-------+ +Epoch 29 | +Epoch 29 |Statistics of the quantization share: +Epoch 29 |+----------------------------------+--------------------+ +Epoch 29 || Statistic's name | Value | +Epoch 29 |+==================================+====================+ +Epoch 29 || Symmetric WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 29 |+----------------------------------+--------------------+ +Epoch 29 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 29 |+----------------------------------+--------------------+ +Epoch 29 || Signed WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 29 |+----------------------------------+--------------------+ +Epoch 29 || Unsigned WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 29 |+----------------------------------+--------------------+ +Epoch 29 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 74) | +Epoch 29 |+----------------------------------+--------------------+ +Epoch 29 || Per-channel WQs / All placed WQs | 100.00 % (74 / 74) | +Epoch 29 |+----------------------------------+--------------------+ +Epoch 29 || Placed WQs / Potential WQs | 72.55 % (74 / 102) | +Epoch 29 |+----------------------------------+--------------------+ +Epoch 29 || Symmetric AQs / All placed AQs | 24.24 % (24 / 99) | +Epoch 29 |+----------------------------------+--------------------+ +Epoch 29 || Asymmetric AQs / All placed AQs | 75.76 % (75 / 99) | +Epoch 29 |+----------------------------------+--------------------+ +Epoch 29 || Signed AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 29 |+----------------------------------+--------------------+ +Epoch 29 || Unsigned AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 29 |+----------------------------------+--------------------+ +Epoch 29 || Per-tensor AQs / All placed AQs | 100.00 % (99 / 99) | +Epoch 29 |+----------------------------------+--------------------+ +Epoch 29 || Per-channel AQs / All placed AQs | 0.00 % (0 / 99) | +Epoch 29 |+----------------------------------+--------------------+ +Epoch 29 | +Epoch 29 |Statistics of the bitwidth distribution: +Epoch 29 |+--------------+---------------------+--------------------+--------------------+ +Epoch 29 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed | +Epoch 29 || | WQs | Placed AQs | Qs | +Epoch 29 |+==============+=====================+====================+====================+ +Epoch 29 || 8 | 100.00 % (74 / 74) | 100.00 % (99 / 99) | 100.00 % (173 / | +Epoch 29 || | | | 173) | +Epoch 29 |+--------------+---------------------+--------------------+--------------------+ +Epoch 29 | +Epoch 29 |Statistics of the sparsified model: +Epoch 29 |+-----------------------------------------+-------+ +Epoch 29 || Statistic's name | Value | +Epoch 29 |+=========================================+=======+ +Epoch 29 || Sparsity level of the whole model | 0.621 | +Epoch 29 |+-----------------------------------------+-------+ +Epoch 29 || Sparsity level of all sparsified layers | 0.800 | +Epoch 29 |+-----------------------------------------+-------+ +Epoch 29 | +Epoch 29 |Statistics by sparsified layers: +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || Layer's name | Weight's shape | Sparsity level | Weight's percentage | +Epoch 29 |+======================+================+================+=====================+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.614 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[0]/Be | | | | +Epoch 29 || rtAttention[attentio | | | | +Epoch 29 || n]/BertSelfAttention | | | | +Epoch 29 || [self]/NNCFLinear[qu | | | | +Epoch 29 || ery]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.622 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[0]/Be | | | | +Epoch 29 || rtAttention[attentio | | | | +Epoch 29 || n]/BertSelfAttention | | | | +Epoch 29 || [self]/NNCFLinear[ke | | | | +Epoch 29 || y]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.623 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[0]/Be | | | | +Epoch 29 || rtAttention[attentio | | | | +Epoch 29 || n]/BertSelfAttention | | | | +Epoch 29 || [self]/NNCFLinear[va | | | | +Epoch 29 || lue]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.644 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[0]/Be | | | | +Epoch 29 || rtAttention[attentio | | | | +Epoch 29 || n]/BertSelfOutput[ou | | | | +Epoch 29 || tput]/NNCFLinear[den | | | | +Epoch 29 || se]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[0]/Be | | | | +Epoch 29 || rtIntermediate[inter | | | | +Epoch 29 || mediate]/NNCFLinear[ | | | | +Epoch 29 || dense]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 3072] | 0.897 | 2.778 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[0]/Be | | | | +Epoch 29 || rtOutput[output]/NNC | | | | +Epoch 29 || FLinear[dense]/linea | | | | +Epoch 29 || r_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.610 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[1]/Be | | | | +Epoch 29 || rtAttention[attentio | | | | +Epoch 29 || n]/BertSelfAttention | | | | +Epoch 29 || [self]/NNCFLinear[qu | | | | +Epoch 29 || ery]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.613 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[1]/Be | | | | +Epoch 29 || rtAttention[attentio | | | | +Epoch 29 || n]/BertSelfAttention | | | | +Epoch 29 || [self]/NNCFLinear[ke | | | | +Epoch 29 || y]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.625 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[1]/Be | | | | +Epoch 29 || rtAttention[attentio | | | | +Epoch 29 || n]/BertSelfAttention | | | | +Epoch 29 || [self]/NNCFLinear[va | | | | +Epoch 29 || lue]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.646 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[1]/Be | | | | +Epoch 29 || rtAttention[attentio | | | | +Epoch 29 || n]/BertSelfOutput[ou | | | | +Epoch 29 || tput]/NNCFLinear[den | | | | +Epoch 29 || se]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[1]/Be | | | | +Epoch 29 || rtIntermediate[inter | | | | +Epoch 29 || mediate]/NNCFLinear[ | | | | +Epoch 29 || dense]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 3072] | 0.898 | 2.778 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[1]/Be | | | | +Epoch 29 || rtOutput[output]/NNC | | | | +Epoch 29 || FLinear[dense]/linea | | | | +Epoch 29 || r_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.625 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[2]/Be | | | | +Epoch 29 || rtAttention[attentio | | | | +Epoch 29 || n]/BertSelfAttention | | | | +Epoch 29 || [self]/NNCFLinear[qu | | | | +Epoch 29 || ery]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.626 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[2]/Be | | | | +Epoch 29 || rtAttention[attentio | | | | +Epoch 29 || n]/BertSelfAttention | | | | +Epoch 29 || [self]/NNCFLinear[ke | | | | +Epoch 29 || y]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.628 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[2]/Be | | | | +Epoch 29 || rtAttention[attentio | | | | +Epoch 29 || n]/BertSelfAttention | | | | +Epoch 29 || [self]/NNCFLinear[va | | | | +Epoch 29 || lue]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.640 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[2]/Be | | | | +Epoch 29 || rtAttention[attentio | | | | +Epoch 29 || n]/BertSelfOutput[ou | | | | +Epoch 29 || tput]/NNCFLinear[den | | | | +Epoch 29 || se]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [3072, 768] | 0.892 | 2.778 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[2]/Be | | | | +Epoch 29 || rtIntermediate[inter | | | | +Epoch 29 || mediate]/NNCFLinear[ | | | | +Epoch 29 || dense]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 3072] | 0.897 | 2.778 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[2]/Be | | | | +Epoch 29 || rtOutput[output]/NNC | | | | +Epoch 29 || FLinear[dense]/linea | | | | +Epoch 29 || r_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.607 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[3]/Be | | | | +Epoch 29 || rtAttention[attentio | | | | +Epoch 29 || n]/BertSelfAttention | | | | +Epoch 29 || [self]/NNCFLinear[qu | | | | +Epoch 29 || ery]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.610 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[3]/Be | | | | +Epoch 29 || rtAttention[attentio | | | | +Epoch 29 || n]/BertSelfAttention | | | | +Epoch 29 || [self]/NNCFLinear[ke | | | | +Epoch 29 || y]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.624 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[3]/Be | | | | +Epoch 29 || rtAttention[attentio | | | | +Epoch 29 || n]/BertSelfAttention | | | | +Epoch 29 || [self]/NNCFLinear[va | | | | +Epoch 29 || lue]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.634 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[3]/Be | | | | +Epoch 29 || rtAttention[attentio | | | | +Epoch 29 || n]/BertSelfOutput[ou | | | | +Epoch 29 || tput]/NNCFLinear[den | | | | +Epoch 29 || se]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [3072, 768] | 0.892 | 2.778 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[3]/Be | | | | +Epoch 29 || rtIntermediate[inter | | | | +Epoch 29 || mediate]/NNCFLinear[ | | | | +Epoch 29 || dense]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 3072] | 0.899 | 2.778 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[3]/Be | | | | +Epoch 29 || rtOutput[output]/NNC | | | | +Epoch 29 || FLinear[dense]/linea | | | | +Epoch 29 || r_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.605 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[4]/Be | | | | +Epoch 29 || rtAttention[attentio | | | | +Epoch 29 || n]/BertSelfAttention | | | | +Epoch 29 || [self]/NNCFLinear[qu | | | | +Epoch 29 || ery]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.605 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[4]/Be | | | | +Epoch 29 || rtAttention[attentio | | | | +Epoch 29 || n]/BertSelfAttention | | | | +Epoch 29 || [self]/NNCFLinear[ke | | | | +Epoch 29 || y]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.614 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[4]/Be | | | | +Epoch 29 || rtAttention[attentio | | | | +Epoch 29 || n]/BertSelfAttention | | | | +Epoch 29 || [self]/NNCFLinear[va | | | | +Epoch 29 || lue]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.627 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[4]/Be | | | | +Epoch 29 || rtAttention[attentio | | | | +Epoch 29 || n]/BertSelfOutput[ou | | | | +Epoch 29 || tput]/NNCFLinear[den | | | | +Epoch 29 || se]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[4]/Be | | | | +Epoch 29 || rtIntermediate[inter | | | | +Epoch 29 || mediate]/NNCFLinear[ | | | | +Epoch 29 || dense]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 3072] | 0.898 | 2.778 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[4]/Be | | | | +Epoch 29 || rtOutput[output]/NNC | | | | +Epoch 29 || FLinear[dense]/linea | | | | +Epoch 29 || r_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.603 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[5]/Be | | | | +Epoch 29 || rtAttention[attentio | | | | +Epoch 29 || n]/BertSelfAttention | | | | +Epoch 29 || [self]/NNCFLinear[qu | | | | +Epoch 29 || ery]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.605 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[5]/Be | | | | +Epoch 29 || rtAttention[attentio | | | | +Epoch 29 || n]/BertSelfAttention | | | | +Epoch 29 || [self]/NNCFLinear[ke | | | | +Epoch 29 || y]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.618 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[5]/Be | | | | +Epoch 29 || rtAttention[attentio | | | | +Epoch 29 || n]/BertSelfAttention | | | | +Epoch 29 || [self]/NNCFLinear[va | | | | +Epoch 29 || lue]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.626 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[5]/Be | | | | +Epoch 29 || rtAttention[attentio | | | | +Epoch 29 || n]/BertSelfOutput[ou | | | | +Epoch 29 || tput]/NNCFLinear[den | | | | +Epoch 29 || se]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[5]/Be | | | | +Epoch 29 || rtIntermediate[inter | | | | +Epoch 29 || mediate]/NNCFLinear[ | | | | +Epoch 29 || dense]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 3072] | 0.897 | 2.778 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[5]/Be | | | | +Epoch 29 || rtOutput[output]/NNC | | | | +Epoch 29 || FLinear[dense]/linea | | | | +Epoch 29 || r_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.601 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[6]/Be | | | | +Epoch 29 || rtAttention[attentio | | | | +Epoch 29 || n]/BertSelfAttention | | | | +Epoch 29 || [self]/NNCFLinear[qu | | | | +Epoch 29 || ery]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.602 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[6]/Be | | | | +Epoch 29 || rtAttention[attentio | | | | +Epoch 29 || n]/BertSelfAttention | | | | +Epoch 29 || [self]/NNCFLinear[ke | | | | +Epoch 29 || y]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.618 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[6]/Be | | | | +Epoch 29 || rtAttention[attentio | | | | +Epoch 29 || n]/BertSelfAttention | | | | +Epoch 29 || [self]/NNCFLinear[va | | | | +Epoch 29 || lue]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.626 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[6]/Be | | | | +Epoch 29 || rtAttention[attentio | | | | +Epoch 29 || n]/BertSelfOutput[ou | | | | +Epoch 29 || tput]/NNCFLinear[den | | | | +Epoch 29 || se]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[6]/Be | | | | +Epoch 29 || rtIntermediate[inter | | | | +Epoch 29 || mediate]/NNCFLinear[ | | | | +Epoch 29 || dense]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 3072] | 0.896 | 2.778 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[6]/Be | | | | +Epoch 29 || rtOutput[output]/NNC | | | | +Epoch 29 || FLinear[dense]/linea | | | | +Epoch 29 || r_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.601 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[7]/Be | | | | +Epoch 29 || rtAttention[attentio | | | | +Epoch 29 || n]/BertSelfAttention | | | | +Epoch 29 || [self]/NNCFLinear[qu | | | | +Epoch 29 || ery]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.601 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[7]/Be | | | | +Epoch 29 || rtAttention[attentio | | | | +Epoch 29 || n]/BertSelfAttention | | | | +Epoch 29 || [self]/NNCFLinear[ke | | | | +Epoch 29 || y]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.614 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[7]/Be | | | | +Epoch 29 || rtAttention[attentio | | | | +Epoch 29 || n]/BertSelfAttention | | | | +Epoch 29 || [self]/NNCFLinear[va | | | | +Epoch 29 || lue]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.620 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[7]/Be | | | | +Epoch 29 || rtAttention[attentio | | | | +Epoch 29 || n]/BertSelfOutput[ou | | | | +Epoch 29 || tput]/NNCFLinear[den | | | | +Epoch 29 || se]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[7]/Be | | | | +Epoch 29 || rtIntermediate[inter | | | | +Epoch 29 || mediate]/NNCFLinear[ | | | | +Epoch 29 || dense]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 3072] | 0.894 | 2.778 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[7]/Be | | | | +Epoch 29 || rtOutput[output]/NNC | | | | +Epoch 29 || FLinear[dense]/linea | | | | +Epoch 29 || r_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[8]/Be | | | | +Epoch 29 || rtAttention[attentio | | | | +Epoch 29 || n]/BertSelfAttention | | | | +Epoch 29 || [self]/NNCFLinear[qu | | | | +Epoch 29 || ery]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[8]/Be | | | | +Epoch 29 || rtAttention[attentio | | | | +Epoch 29 || n]/BertSelfAttention | | | | +Epoch 29 || [self]/NNCFLinear[ke | | | | +Epoch 29 || y]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.606 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[8]/Be | | | | +Epoch 29 || rtAttention[attentio | | | | +Epoch 29 || n]/BertSelfAttention | | | | +Epoch 29 || [self]/NNCFLinear[va | | | | +Epoch 29 || lue]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.612 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[8]/Be | | | | +Epoch 29 || rtAttention[attentio | | | | +Epoch 29 || n]/BertSelfOutput[ou | | | | +Epoch 29 || tput]/NNCFLinear[den | | | | +Epoch 29 || se]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [3072, 768] | 0.890 | 2.778 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[8]/Be | | | | +Epoch 29 || rtIntermediate[inter | | | | +Epoch 29 || mediate]/NNCFLinear[ | | | | +Epoch 29 || dense]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 3072] | 0.894 | 2.778 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[8]/Be | | | | +Epoch 29 || rtOutput[output]/NNC | | | | +Epoch 29 || FLinear[dense]/linea | | | | +Epoch 29 || r_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.595 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[9]/Be | | | | +Epoch 29 || rtAttention[attentio | | | | +Epoch 29 || n]/BertSelfAttention | | | | +Epoch 29 || [self]/NNCFLinear[qu | | | | +Epoch 29 || ery]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.598 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[9]/Be | | | | +Epoch 29 || rtAttention[attentio | | | | +Epoch 29 || n]/BertSelfAttention | | | | +Epoch 29 || [self]/NNCFLinear[ke | | | | +Epoch 29 || y]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.604 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[9]/Be | | | | +Epoch 29 || rtAttention[attentio | | | | +Epoch 29 || n]/BertSelfAttention | | | | +Epoch 29 || [self]/NNCFLinear[va | | | | +Epoch 29 || lue]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.607 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[9]/Be | | | | +Epoch 29 || rtAttention[attentio | | | | +Epoch 29 || n]/BertSelfOutput[ou | | | | +Epoch 29 || tput]/NNCFLinear[den | | | | +Epoch 29 || se]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [3072, 768] | 0.894 | 2.778 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[9]/Be | | | | +Epoch 29 || rtIntermediate[inter | | | | +Epoch 29 || mediate]/NNCFLinear[ | | | | +Epoch 29 || dense]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 3072] | 0.899 | 2.778 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[9]/Be | | | | +Epoch 29 || rtOutput[output]/NNC | | | | +Epoch 29 || FLinear[dense]/linea | | | | +Epoch 29 || r_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.597 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[10]/B | | | | +Epoch 29 || ertAttention[attenti | | | | +Epoch 29 || on]/BertSelfAttentio | | | | +Epoch 29 || n[self]/NNCFLinear[q | | | | +Epoch 29 || uery]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.597 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[10]/B | | | | +Epoch 29 || ertAttention[attenti | | | | +Epoch 29 || on]/BertSelfAttentio | | | | +Epoch 29 || n[self]/NNCFLinear[k | | | | +Epoch 29 || ey]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.613 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[10]/B | | | | +Epoch 29 || ertAttention[attenti | | | | +Epoch 29 || on]/BertSelfAttentio | | | | +Epoch 29 || n[self]/NNCFLinear[v | | | | +Epoch 29 || alue]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.609 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[10]/B | | | | +Epoch 29 || ertAttention[attenti | | | | +Epoch 29 || on]/BertSelfOutput[o | | | | +Epoch 29 || utput]/NNCFLinear[de | | | | +Epoch 29 || nse]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [3072, 768] | 0.891 | 2.778 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[10]/B | | | | +Epoch 29 || ertIntermediate[inte | | | | +Epoch 29 || rmediate]/NNCFLinear | | | | +Epoch 29 || [dense]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 3072] | 0.896 | 2.778 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[10]/B | | | | +Epoch 29 || ertOutput[output]/NN | | | | +Epoch 29 || CFLinear[dense]/line | | | | +Epoch 29 || ar_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.599 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[11]/B | | | | +Epoch 29 || ertAttention[attenti | | | | +Epoch 29 || on]/BertSelfAttentio | | | | +Epoch 29 || n[self]/NNCFLinear[q | | | | +Epoch 29 || uery]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.596 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[11]/B | | | | +Epoch 29 || ertAttention[attenti | | | | +Epoch 29 || on]/BertSelfAttentio | | | | +Epoch 29 || n[self]/NNCFLinear[k | | | | +Epoch 29 || ey]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.600 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[11]/B | | | | +Epoch 29 || ertAttention[attenti | | | | +Epoch 29 || on]/BertSelfAttentio | | | | +Epoch 29 || n[self]/NNCFLinear[v | | | | +Epoch 29 || alue]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 768] | 0.598 | 0.694 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[11]/B | | | | +Epoch 29 || ertAttention[attenti | | | | +Epoch 29 || on]/BertSelfOutput[o | | | | +Epoch 29 || utput]/NNCFLinear[de | | | | +Epoch 29 || nse]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [3072, 768] | 0.889 | 2.778 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[11]/B | | | | +Epoch 29 || ertIntermediate[inte | | | | +Epoch 29 || rmediate]/NNCFLinear | | | | +Epoch 29 || [dense]/linear_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 || BertForSequenceClass | [768, 3072] | 0.890 | 2.778 | +Epoch 29 || ification/BertModel[ | | | | +Epoch 29 || bert]/BertEncoder[en | | | | +Epoch 29 || coder]/ModuleList[la | | | | +Epoch 29 || yer]/BertLayer[11]/B | | | | +Epoch 29 || ertOutput[output]/NN | | | | +Epoch 29 || CFLinear[dense]/line | | | | +Epoch 29 || ar_0 | | | | +Epoch 29 |+----------------------+----------------+----------------+---------------------+ +Epoch 29 | +Epoch 29 |Statistics of the magnitude sparsity algorithm: +Epoch 29 |+----------------------------------------------------------------------+-------+ +Epoch 29 || Statistic's name | Value | +Epoch 29 |+======================================================================+=======+ +Epoch 29 || A target level of the sparsity for the algorithm for the current | 0.800 | +Epoch 29 || epoch | | +Epoch 29 |+----------------------------------------------------------------------+-------+ +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || Layer's name | Sparsity threshold | +Epoch 29 |+=========================================================+====================+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 29 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 29 || linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 29 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 29 || near_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 29 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 29 || linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertAttentio | | +Epoch 29 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 29 || inear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertIntermed | | +Epoch 29 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[0]/BertOutput[o | | +Epoch 29 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 29 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 29 || linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 29 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 29 || near_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 29 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 29 || linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertAttentio | | +Epoch 29 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 29 || inear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertIntermed | | +Epoch 29 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[1]/BertOutput[o | | +Epoch 29 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 29 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 29 || linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 29 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 29 || near_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 29 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 29 || linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertAttentio | | +Epoch 29 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 29 || inear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertIntermed | | +Epoch 29 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[2]/BertOutput[o | | +Epoch 29 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 29 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 29 || linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 29 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 29 || near_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 29 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 29 || linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertAttentio | | +Epoch 29 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 29 || inear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertIntermed | | +Epoch 29 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[3]/BertOutput[o | | +Epoch 29 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 29 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 29 || linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 29 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 29 || near_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 29 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 29 || linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertAttentio | | +Epoch 29 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 29 || inear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertIntermed | | +Epoch 29 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[4]/BertOutput[o | | +Epoch 29 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 29 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 29 || linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 29 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 29 || near_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 29 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 29 || linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertAttentio | | +Epoch 29 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 29 || inear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertIntermed | | +Epoch 29 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[5]/BertOutput[o | | +Epoch 29 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 29 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 29 || linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 29 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 29 || near_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 29 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 29 || linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertAttentio | | +Epoch 29 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 29 || inear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertIntermed | | +Epoch 29 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[6]/BertOutput[o | | +Epoch 29 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 29 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 29 || linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 29 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 29 || near_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 29 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 29 || linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertAttentio | | +Epoch 29 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 29 || inear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertIntermed | | +Epoch 29 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[7]/BertOutput[o | | +Epoch 29 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 29 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 29 || linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 29 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 29 || near_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 29 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 29 || linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertAttentio | | +Epoch 29 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 29 || inear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertIntermed | | +Epoch 29 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[8]/BertOutput[o | | +Epoch 29 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 29 || n[attention]/BertSelfAttention[self]/NNCFLinear[query]/ | | +Epoch 29 || linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 29 || n[attention]/BertSelfAttention[self]/NNCFLinear[key]/li | | +Epoch 29 || near_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 29 || n[attention]/BertSelfAttention[self]/NNCFLinear[value]/ | | +Epoch 29 || linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertAttentio | | +Epoch 29 || n[attention]/BertSelfOutput[output]/NNCFLinear[dense]/l | | +Epoch 29 || inear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertIntermed | | +Epoch 29 || iate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[9]/BertOutput[o | | +Epoch 29 || utput]/NNCFLinear[dense]/linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 29 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 29 || /linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 29 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 29 || inear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 29 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 29 || /linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertAttenti | | +Epoch 29 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 29 || linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertInterme | | +Epoch 29 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[10]/BertOutput[ | | +Epoch 29 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 29 || on[attention]/BertSelfAttention[self]/NNCFLinear[query] | | +Epoch 29 || /linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 29 || on[attention]/BertSelfAttention[self]/NNCFLinear[key]/l | | +Epoch 29 || inear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 29 || on[attention]/BertSelfAttention[self]/NNCFLinear[value] | | +Epoch 29 || /linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertAttenti | | +Epoch 29 || on[attention]/BertSelfOutput[output]/NNCFLinear[dense]/ | | +Epoch 29 || linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertInterme | | +Epoch 29 || diate[intermediate]/NNCFLinear[dense]/linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+ +Epoch 29 || BertForSequenceClassification/BertModel[bert]/BertEncod | 0.001 | +Epoch 29 || er[encoder]/ModuleList[layer]/BertLayer[11]/BertOutput[ | | +Epoch 29 || output]/NNCFLinear[dense]/linear_0 | | +Epoch 29 |+---------------------------------------------------------+--------------------+