Update README.md
Browse files
README.md
CHANGED
@@ -30,7 +30,9 @@ from transformers import AutoModel, AutoTokenizer
|
|
30 |
checkpoint = "codesage/codesage-base"
|
31 |
device = "cuda" # for GPU usage or "cpu" for CPU usage
|
32 |
|
33 |
-
|
|
|
|
|
34 |
model = AutoModel.from_pretrained(checkpoint, trust_remote_code=True).to(device)
|
35 |
|
36 |
inputs = tokenizer.encode("def print_hello_world():\tprint('Hello World!')", return_tensors="pt").to(device)
|
|
|
30 |
checkpoint = "codesage/codesage-base"
|
31 |
device = "cuda" # for GPU usage or "cpu" for CPU usage
|
32 |
|
33 |
+
# CodeSage requires adding eos token at the end of each tokenized sequence to ensure good performance
|
34 |
+
tokenizer = AutoTokenizer.from_pretrained(checkpoint, trust_remote_code=True, add_eos_token=True)
|
35 |
+
|
36 |
model = AutoModel.from_pretrained(checkpoint, trust_remote_code=True).to(device)
|
37 |
|
38 |
inputs = tokenizer.encode("def print_hello_world():\tprint('Hello World!')", return_tensors="pt").to(device)
|