syzymon commited on
Commit
8805cbd
1 Parent(s): caf6d75

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +7 -7
README.md CHANGED
@@ -63,7 +63,7 @@ with three layers used for context extension. **Crucially, LongLLaMA is able to
63
 
64
  <div align="center">
65
 
66
- | | [LongLLaMA-3B](https://huggingface.co/syzymon/long_llama_3b) | [LongLLaMA-3Bv1.1](https://huggingface.co/syzymon/long_llama_3b_v1_1) | LongLLaMA-7B<br />*(coming soon)*| LongLLaMA-13B<br />*(coming soon)*|
67
  |----------------|----------|----------|-----------|-----------|
68
  | Source model | [OpenLLaMA-3B](https://huggingface.co/openlm-research/open_llama_3b_easylm) | [OpenLLaMA-3Bv2](https://huggingface.co/openlm-research/open_llama_3b_v2_easylm) | - | - |
69
  | Source model tokens | 1T | 1 T | - | - |
@@ -93,8 +93,8 @@ pip install transformers==4.30 sentencepiece accelerate
93
  import torch
94
  from transformers import LlamaTokenizer, AutoModelForCausalLM
95
 
96
- tokenizer = LlamaTokenizer.from_pretrained("syzymon/long_llama_3b")
97
- model = AutoModelForCausalLM.from_pretrained("syzymon/long_llama_3b",
98
  torch_dtype=torch.float32,
99
  trust_remote_code=True)
100
  ```
@@ -132,9 +132,9 @@ LongLLaMA has several other parameters:
132
  import torch
133
  from transformers import LlamaTokenizer, AutoModelForCausalLM
134
 
135
- tokenizer = LlamaTokenizer.from_pretrained("syzymon/long_llama_3b")
136
  model = AutoModelForCausalLM.from_pretrained(
137
- "syzymon/long_llama_3b", torch_dtype=torch.float32,
138
  mem_layers=[],
139
  mem_dtype='bfloat16',
140
  trust_remote_code=True,
@@ -150,8 +150,8 @@ model = AutoModelForCausalLM.from_pretrained(
150
  from transformers import LlamaTokenizer, LlamaForCausalLM
151
  import torch
152
 
153
- tokenizer = LlamaTokenizer.from_pretrained("syzymon/long_llama_3b")
154
- model = LlamaForCausalLM.from_pretrained("syzymon/long_llama_3b", torch_dtype=torch.float32)
155
  ```
156
 
157
 
 
63
 
64
  <div align="center">
65
 
66
+ | | [LongLLaMA-3B](https://huggingface.co/syzymon/long_llama_3b_instruct) | [LongLLaMA-3Bv1.1](https://huggingface.co/syzymon/long_llama_3b_v1_1) | LongLLaMA-7B<br />*(coming soon)*| LongLLaMA-13B<br />*(coming soon)*|
67
  |----------------|----------|----------|-----------|-----------|
68
  | Source model | [OpenLLaMA-3B](https://huggingface.co/openlm-research/open_llama_3b_easylm) | [OpenLLaMA-3Bv2](https://huggingface.co/openlm-research/open_llama_3b_v2_easylm) | - | - |
69
  | Source model tokens | 1T | 1 T | - | - |
 
93
  import torch
94
  from transformers import LlamaTokenizer, AutoModelForCausalLM
95
 
96
+ tokenizer = LlamaTokenizer.from_pretrained("syzymon/long_llama_3b_instruct")
97
+ model = AutoModelForCausalLM.from_pretrained("syzymon/long_llama_3b_instruct",
98
  torch_dtype=torch.float32,
99
  trust_remote_code=True)
100
  ```
 
132
  import torch
133
  from transformers import LlamaTokenizer, AutoModelForCausalLM
134
 
135
+ tokenizer = LlamaTokenizer.from_pretrained("syzymon/long_llama_3b_instruct")
136
  model = AutoModelForCausalLM.from_pretrained(
137
+ "syzymon/long_llama_3b_instruct", torch_dtype=torch.float32,
138
  mem_layers=[],
139
  mem_dtype='bfloat16',
140
  trust_remote_code=True,
 
150
  from transformers import LlamaTokenizer, LlamaForCausalLM
151
  import torch
152
 
153
+ tokenizer = LlamaTokenizer.from_pretrained("syzymon/long_llama_3b_instruct")
154
+ model = LlamaForCausalLM.from_pretrained("syzymon/long_llama_3b_instruct", torch_dtype=torch.float32)
155
  ```
156
 
157