PrompTart commited on
Commit
06b7708
ยท
verified ยท
1 Parent(s): a0a8e19

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +5 -4
README.md CHANGED
@@ -30,14 +30,15 @@ tokenizer = M2M100Tokenizer.from_pretrained(model_name)
30
  model = M2M100ForConditionalGeneration.from_pretrained(model_name)
31
 
32
  # Example sentence
33
- text = "The model was fine-tuned using knowledge distillation techniques."
34
 
35
  # Tokenize and generate translation
36
  tokenizer.src_lang = "en"
37
- encoded = tokenizer(text, return_tensors="pt")
38
  generated_tokens = model.generate(**encoded, forced_bos_token_id=tokenizer.get_lang_id("ko"))
39
- tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
40
- # => "์ด ๋ชจ๋ธ์€ ์ง€์‹ ์ฆ๋ฅ˜ ๊ธฐ๋ฒ•(knowledge distillation techniques)์„ ์‚ฌ์šฉํ•˜์—ฌ ๋ฏธ์„ธ ์กฐ์ •๋˜์—ˆ์Šต๋‹ˆ๋‹ค."
 
41
 
42
  ```
43
 
 
30
  model = M2M100ForConditionalGeneration.from_pretrained(model_name)
31
 
32
  # Example sentence
33
+ text = "The model was fine-tuned using knowledge distillation techniques. The training dataset was created using a collaborative multi-agent framework powered by large language models."
34
 
35
  # Tokenize and generate translation
36
  tokenizer.src_lang = "en"
37
+ encoded = tokenizer(text.split('. '), return_tensors="pt", padding=True)
38
  generated_tokens = model.generate(**encoded, forced_bos_token_id=tokenizer.get_lang_id("ko"))
39
+ outputs = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
40
+ print(' '.join(outputs))
41
+ # => "์ด ๋ชจ๋ธ์€ ์ง€์‹ ์ฆ๋ฅ˜ ๊ธฐ๋ฒ•(knowledge distillation techniques)์„ ์‚ฌ์šฉํ•˜์—ฌ ๋ฏธ์„ธ ์กฐ์ •๋˜์—ˆ์Šต๋‹ˆ๋‹ค. ํ›ˆ๋ จ ๋ฐ์ดํ„ฐ์…‹(training dataset)์€ ๋Œ€ํ˜• ์–ธ์–ด ๋ชจ๋ธ(large language models)์„ ๊ธฐ๋ฐ˜์œผ๋กœ ํ•œ ํ˜‘์—… ๋‹ค์ค‘ ์—์ด์ „ํŠธ ํ”„๋ ˆ์ž„์›Œํฌ(collaborative multi-agent framework)๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ์ƒ์„ฑ๋˜์—ˆ์Šต๋‹ˆ๋‹ค."
42
 
43
  ```
44