utrobinmv commited on
Commit
b070fd8
·
1 Parent(s): 8d973fa

add sentence

Browse files
1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 768,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
2_Dense/config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"in_features": 768, "out_features": 768, "bias": false, "activation_function": "torch.nn.modules.activation.Tanh"}
2_Dense/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c2d8106dd315cd2a0b39459de220455a21d289b1f4cb1eb9c364dde3570c9c6
3
+ size 1179736
README.md ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ - ru
5
+ - zh
6
+ tags:
7
+ - sentence-transformers
8
+ - feature-extraction
9
+ - sentence-similarity
10
+ - text2text-generation
11
+ - t5
12
+ base_model:
13
+ - utrobinmv/t5_translate_en_ru_zh_base_200
14
+ license: apache-2.0
15
+ widget:
16
+ - example_title: translate zh-ru
17
+ text: >
18
+ translate to ru: 开发的目的是为用户提供个人同步翻译。
19
+ - example_title: translate ru-en
20
+ text: >
21
+ translate to en: Цель разработки — предоставить пользователям личного синхронного переводчика.
22
+ - example_title: translate en-ru
23
+ text: >
24
+ translate to ru: The purpose of the development is to provide users with a personal synchronized interpreter.
25
+ - example_title: translate en-zh
26
+ text: >
27
+ translate to zh: The purpose of the development is to provide users with a personal synchronized interpreter.
28
+ - example_title: translate zh-en
29
+ text: >
30
+ translate to en: 开发的目的是为用户提供个人同步解释器。
31
+ - example_title: translate ru-zh
32
+ text: >
33
+ translate to zh: Цель разработки — предоставить пользователям личного синхронного переводчика.
34
+ ---
35
+
36
+ # T5 English, Russian and Chinese multilingual machine translation
37
+
38
+ This is a [sentence-transformers](https://www.sbert.net/) model: It maps sentences & paragraphs to a 768 dimensional dense vector space. The model works well for sentence similarity tasks, but doesn't perform that well for semantic search tasks.
39
+
40
+ The model uses only the encoder from a T5-base model.
41
+
42
+
43
+ ## Usage (Sentence-Transformers)
44
+
45
+ Using this model becomes easy when you have [sentence-transformers](https://www.SBERT.net) installed:
46
+
47
+ ```
48
+ pip install -U sentence-transformers
49
+ ```
50
+
51
+ Then you can use the model like this:
52
+
53
+ ```python
54
+ from sentence_transformers import SentenceTransformer
55
+ sentences = ["This is an example sentence", "Each sentence is converted"]
56
+ model = SentenceTransformer('sentence-transformers/sentence-t5-base')
57
+ embeddings = model.encode(sentences)
58
+ print(embeddings)
59
+ ```
60
+
61
+
62
+
63
+ Example translate Russian to Chinese
64
+
65
+ ```python
66
+ from transformers import T5ForConditionalGeneration, T5Tokenizer
67
+
68
+ device = 'cuda' #or 'cpu' for translate on cpu
69
+
70
+ model_name = 'utrobinmv/t5_translate_en_ru_zh_large_1024'
71
+ model = T5ForConditionalGeneration.from_pretrained(model_name)
72
+ model.to(device)
73
+ tokenizer = T5Tokenizer.from_pretrained(model_name)
74
+
75
+ prefix = 'translate to zh: '
76
+ src_text = prefix + "Съешь ещё этих мягких французских булок."
77
+
78
+ # translate Russian to Chinese
79
+ input_ids = tokenizer(src_text, return_tensors="pt")
80
+
81
+ generated_tokens = model.generate(**input_ids.to(device))
82
+
83
+ result = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
84
+ print(result)
85
+ # 再吃这些法国的甜蜜的面包。
86
+ ```
87
+
88
+
89
+
90
+ and Example translate Chinese to Russian
91
+
92
+ ```python
93
+ from transformers import T5ForConditionalGeneration, T5Tokenizer
94
+
95
+ device = 'cuda' #or 'cpu' for translate on cpu
96
+
97
+ model_name = 'utrobinmv/t5_translate_en_ru_zh_large_1024'
98
+ model = T5ForConditionalGeneration.from_pretrained(model_name)
99
+ model.to(device)
100
+ tokenizer = T5Tokenizer.from_pretrained(model_name)
101
+
102
+ prefix = 'translate to ru: '
103
+ src_text = prefix + "再吃这些法国的甜蜜的面包。"
104
+
105
+ # translate Russian to Chinese
106
+ input_ids = tokenizer(src_text, return_tensors="pt")
107
+
108
+ generated_tokens = model.generate(**input_ids.to(device))
109
+
110
+ result = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
111
+ print(result)
112
+ # Съешьте этот сладкий хлеб из Франции.
113
+ ```
114
+
115
+
116
+
117
+ ##
118
+
119
+
120
+
121
+ ## Languages covered
122
+
123
+ Russian (ru_RU), Chinese (zh_CN), English (en_US)
config_sentence_transformers.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "3.0.1",
4
+ "transformers": "4.38.2",
5
+ "pytorch": "2.2.1+cu121"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null,
9
+ "similarity_fn_name": "cos_sim"
10
+ }
modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})",
5
+ "path": "1_Pooling",
6
+ "type": "models.Pooling"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "Dense({'in_features': 768, 'out_features': 768, 'bias': False, 'activation_function': 'torch.nn.modules.activation.Tanh'})",
11
+ "path": "2_Dense",
12
+ "type": "models.Dense"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "Normalize()",
17
+ "path": "3_Normalize",
18
+ "type": "models.Normalize"
19
+ }
20
+ ]