update model

Files changed (6) hide show

README.md +12 -6
config.json +1 -1
config_sentence_transformers.json +1 -1
pytorch_model.bin +2 -2
sentence_bert_config.json +1 -1
tokenizer.json +1 -1

README.md CHANGED Viewed

@@ -21,6 +21,12 @@ This model was adapted from [ytu-ce-cosmos/turkish-tiny-bert-uncased](https://hu
 - [nli_tr](https://huggingface.co/datasets/nli_tr)
 - [emrecan/stsb-mt-turkish](https://huggingface.co/datasets/emrecan/stsb-mt-turkish)
 ## Usage (Sentence-Transformers)
 Using this model becomes easy when you have [sentence-transformers](https://www.SBERT.net) installed:
@@ -85,10 +91,10 @@ print(sentence_embeddings)
 Achieved results on the [STS-b](https://huggingface.co/datasets/emrecan/stsb-mt-turkish) test split are given below:
 ```txt
-Cosine-Similarity :       Pearson: 0.6587 Spearman: 0.6370
-Manhattan-Distance:       Pearson: 0.6293 Spearman: 0.6151
-Euclidean-Distance:       Pearson: 0.6335 Spearman: 0.6186
-Dot-Product-Similarity:   Pearson: 0.5972 Spearman: 0.5756
 ```
@@ -110,7 +116,7 @@ Parameters of the fit()-Method:
 ```
 {
     "epochs": 10,
-    "evaluation_steps": 22,
     "evaluator": "sentence_transformers.evaluation.EmbeddingSimilarityEvaluator.EmbeddingSimilarityEvaluator",
     "max_grad_norm": 1,
     "optimizer_class": "<class 'torch.optim.adamw.AdamW'>",
@@ -128,7 +134,7 @@ Parameters of the fit()-Method:
 ## Full Model Architecture
 ```
 SentenceTransformer(
-  (0): Transformer({'max_seq_length': 75, 'do_lower_case': False}) with Transformer model: BertModel
   (1): Pooling({'word_embedding_dimension': 128, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
 )
 ```

 - [nli_tr](https://huggingface.co/datasets/nli_tr)
 - [emrecan/stsb-mt-turkish](https://huggingface.co/datasets/emrecan/stsb-mt-turkish)
+:warning: **All texts were manually lowercased,** [as stated](https://huggingface.co/ytu-ce-cosmos/turkish-tiny-bert-uncased#%E2%9A%A0-uncased-use-requires-manual-lowercase-conversion) by the model's authors:
+ ```python
+text.replace("I", "ı").lower()
+```
 ## Usage (Sentence-Transformers)
 Using this model becomes easy when you have [sentence-transformers](https://www.SBERT.net) installed:
 Achieved results on the [STS-b](https://huggingface.co/datasets/emrecan/stsb-mt-turkish) test split are given below:
 ```txt
+Cosine-Similarity :     Pearson: 0.7515	Spearman: 0.7467
+Manhattan-Distance:	    Pearson: 0.7404	Spearman: 0.7299
+Euclidean-Distance:	    Pearson: 0.7415	Spearman: 0.7305
+Dot-Product-Similarity:	Pearson: 0.6395	Spearman: 0.6140
 ```
 ```
 {
     "epochs": 10,
+    "evaluation_steps": 574,
     "evaluator": "sentence_transformers.evaluation.EmbeddingSimilarityEvaluator.EmbeddingSimilarityEvaluator",
     "max_grad_norm": 1,
     "optimizer_class": "<class 'torch.optim.adamw.AdamW'>",
 ## Full Model Architecture
 ```
 SentenceTransformer(
+  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel
   (1): Pooling({'word_embedding_dimension': 128, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
 )
 ```

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "output/training_nli_v2_ytu-ce-cosmos-turkish-tiny-bert-uncased-2024-02-14_22-23-39\\",
   "architectures": [
     "BertModel"
   ],

 {
+  "_name_or_path": "output/ytu_ce_cosmos-turkish_tiny_bert_uncased-b128-e10-nli/",
   "architectures": [
     "BertModel"
   ],

config_sentence_transformers.json CHANGED Viewed

@@ -2,6 +2,6 @@
   "__version__": {
     "sentence_transformers": "2.2.2",
     "transformers": "4.28.0",
-    "pytorch": "2.0.1+cu118"
   }
 }

   "__version__": {
     "sentence_transformers": "2.2.2",
     "transformers": "4.28.0",
+    "pytorch": "2.1.0+cu121"
   }
 }

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1d1d69f5f19045cb2acaf5ce6bc758eb9ef0bf19bfda219104c995f90a416595
-size 18317545

 version https://git-lfs.github.com/spec/v1
+oid sha256:f0392c2975e017cc3bf5d50e0ecb956eb9b7d99a1ac7a0b1d3b002e5167adb5f
+size 18317990

sentence_bert_config.json CHANGED Viewed

@@ -1,4 +1,4 @@
 {
-  "max_seq_length": 75,
   "do_lower_case": false
 }

 {
+  "max_seq_length": 256,
   "do_lower_case": false
 }

tokenizer.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "version": "1.0",
   "truncation": {
     "direction": "Right",
-    "max_length": 75,
     "strategy": "LongestFirst",
     "stride": 0
   },

   "version": "1.0",
   "truncation": {
     "direction": "Right",
+    "max_length": 256,
     "strategy": "LongestFirst",
     "stride": 0
   },