tangchangli commited on
Commit
6687492
·
1 Parent(s): a5bb810

chore: change to 7b model

Browse files
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: SALMONN Gradio
3
  emoji: ⚡
4
  colorFrom: gray
5
  colorTo: green
 
1
  ---
2
+ title: SALMONN 7B Gradio
3
  emoji: ⚡
4
  colorFrom: gray
5
  colorTo: green
app.py CHANGED
@@ -23,14 +23,15 @@ class ff:
23
 
24
  parser = argparse.ArgumentParser()
25
  parser.add_argument("--device", type=str, default="cuda:0")
26
- parser.add_argument("--ckpt_path", type=str, default="./salmonn_v1.pth")
27
  parser.add_argument("--whisper_path", type=str, default="./whisper_large_v2")
28
  parser.add_argument("--beats_path", type=str, default="./beats/BEATs_iter3_plus_AS2M_finetuned_on_AS2M_cpt2.pt")
29
- parser.add_argument("--vicuna_path", type=str, default="./vicuna.13b")
30
  parser.add_argument("--low_resource", action='store_true', default=False)
31
  parser.add_argument("--port", default=9527)
32
 
33
  args = parser.parse_args()
 
34
  # model = ff()
35
  model = SALMONN(
36
  ckpt=args.ckpt_path,
@@ -84,7 +85,7 @@ def gradio_answer(chatbot, chat_state, num_beams, temperature, top_p):
84
 
85
  title = """<h1 align="center">SALMONN: Speech Audio Language Music Open Neural Network</h1>"""
86
  image_src = """<h1 align="center"><a href="https://github.com/bytedance/SALMONN"><img src="https://raw.githubusercontent.com/bytedance/SALMONN/main/resource/salmon.png", alt="SALMONN" border="0" style="margin: 0 auto; height: 200px;" /></a> </h1>"""
87
- description = """<h3>This is the demo of SALMONN. Upload your audio and start chatting!</h3>"""
88
 
89
 
90
  with gr.Blocks() as demo:
@@ -128,7 +129,7 @@ with gr.Blocks() as demo:
128
  with gr.Column():
129
  chat_state = gr.State([])
130
 
131
- chatbot = gr.Chatbot(label='SALMONN')
132
  text_input = gr.Textbox(label='User', placeholder='Please upload your audio first', interactive=False)
133
 
134
  with gr.Row():
@@ -139,7 +140,7 @@ with gr.Blocks() as demo:
139
  ["resource/audio_demo/gunshots.wav", "Provide the phonetic transcription for the speech."],
140
  ["resource/audio_demo/gunshots.wav", "Please describe the audio."],
141
  ["resource/audio_demo/gunshots.wav", "Recognize what the speaker says and describe the background audio at the same time."],
142
- ["resource/audio_demo/gunshots.wav", "Please answer the speaker's question in detail based on the background sound."],
143
  ["resource/audio_demo/duck.wav", "Please list each event in the audio in order."],
144
  ["resource/audio_demo/duck.wav", "Based on the audio, write a story in detail. Your story should be highly related to the audio."],
145
  ["resource/audio_demo/duck.wav", "How many speakers did you hear in this audio? Who are they?"],
 
23
 
24
  parser = argparse.ArgumentParser()
25
  parser.add_argument("--device", type=str, default="cuda:0")
26
+ parser.add_argument("--ckpt_path", type=str, default="./salmonn_7b_v0.pth")
27
  parser.add_argument("--whisper_path", type=str, default="./whisper_large_v2")
28
  parser.add_argument("--beats_path", type=str, default="./beats/BEATs_iter3_plus_AS2M_finetuned_on_AS2M_cpt2.pt")
29
+ parser.add_argument("--vicuna_path", type=str, default="./vicuna-7b-v1.5")
30
  parser.add_argument("--low_resource", action='store_true', default=False)
31
  parser.add_argument("--port", default=9527)
32
 
33
  args = parser.parse_args()
34
+ args.low_resource = True # for huggingface A10 7b demo
35
  # model = ff()
36
  model = SALMONN(
37
  ckpt=args.ckpt_path,
 
85
 
86
  title = """<h1 align="center">SALMONN: Speech Audio Language Music Open Neural Network</h1>"""
87
  image_src = """<h1 align="center"><a href="https://github.com/bytedance/SALMONN"><img src="https://raw.githubusercontent.com/bytedance/SALMONN/main/resource/salmon.png", alt="SALMONN" border="0" style="margin: 0 auto; height: 200px;" /></a> </h1>"""
88
+ description = """<h3>This is the demo of SALMONN-7B. To experience SALMONN-13B, you can go to https://bytedance.github.io/SALMONN. Upload your audio and start chatting!</h3>"""
89
 
90
 
91
  with gr.Blocks() as demo:
 
129
  with gr.Column():
130
  chat_state = gr.State([])
131
 
132
+ chatbot = gr.Chatbot(label='SALMONN-7B')
133
  text_input = gr.Textbox(label='User', placeholder='Please upload your audio first', interactive=False)
134
 
135
  with gr.Row():
 
140
  ["resource/audio_demo/gunshots.wav", "Provide the phonetic transcription for the speech."],
141
  ["resource/audio_demo/gunshots.wav", "Please describe the audio."],
142
  ["resource/audio_demo/gunshots.wav", "Recognize what the speaker says and describe the background audio at the same time."],
143
+ ["resource/audio_demo/gunshots.wav", "Use your strong reasoning skills to answer the speaker's question in detail based on the background sound."],
144
  ["resource/audio_demo/duck.wav", "Please list each event in the audio in order."],
145
  ["resource/audio_demo/duck.wav", "Based on the audio, write a story in detail. Your story should be highly related to the audio."],
146
  ["resource/audio_demo/duck.wav", "How many speakers did you hear in this audio? Who are they?"],
salmonn_v1.pth → salmonn_7b_v0.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:709c665b25ef05b48985584ec31d6f15018b754abf47b9c33ed9a278285bbae0
3
- size 400466533
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cb2782495b2e3f487222763a30b53b02f727d49059201cc5fa88a7a1fd9dff9
3
+ size 362638989
vicuna-7b-v1.5 ADDED
@@ -0,0 +1 @@
 
 
1
+ Subproject commit de56c35b1763eaae20f4d60efd64af0a9091ebe5
vicuna.13b/LICENSE_vicuna DELETED
@@ -1,201 +0,0 @@
1
- Apache License
2
- Version 2.0, January 2004
3
- http://www.apache.org/licenses/
4
-
5
- TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
-
7
- 1. Definitions.
8
-
9
- "License" shall mean the terms and conditions for use, reproduction,
10
- and distribution as defined by Sections 1 through 9 of this document.
11
-
12
- "Licensor" shall mean the copyright owner or entity authorized by
13
- the copyright owner that is granting the License.
14
-
15
- "Legal Entity" shall mean the union of the acting entity and all
16
- other entities that control, are controlled by, or are under common
17
- control with that entity. For the purposes of this definition,
18
- "control" means (i) the power, direct or indirect, to cause the
19
- direction or management of such entity, whether by contract or
20
- otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
- outstanding shares, or (iii) beneficial ownership of such entity.
22
-
23
- "You" (or "Your") shall mean an individual or Legal Entity
24
- exercising permissions granted by this License.
25
-
26
- "Source" form shall mean the preferred form for making modifications,
27
- including but not limited to software source code, documentation
28
- source, and configuration files.
29
-
30
- "Object" form shall mean any form resulting from mechanical
31
- transformation or translation of a Source form, including but
32
- not limited to compiled object code, generated documentation,
33
- and conversions to other media types.
34
-
35
- "Work" shall mean the work of authorship, whether in Source or
36
- Object form, made available under the License, as indicated by a
37
- copyright notice that is included in or attached to the work
38
- (an example is provided in the Appendix below).
39
-
40
- "Derivative Works" shall mean any work, whether in Source or Object
41
- form, that is based on (or derived from) the Work and for which the
42
- editorial revisions, annotations, elaborations, or other modifications
43
- represent, as a whole, an original work of authorship. For the purposes
44
- of this License, Derivative Works shall not include works that remain
45
- separable from, or merely link (or bind by name) to the interfaces of,
46
- the Work and Derivative Works thereof.
47
-
48
- "Contribution" shall mean any work of authorship, including
49
- the original version of the Work and any modifications or additions
50
- to that Work or Derivative Works thereof, that is intentionally
51
- submitted to Licensor for inclusion in the Work by the copyright owner
52
- or by an individual or Legal Entity authorized to submit on behalf of
53
- the copyright owner. For the purposes of this definition, "submitted"
54
- means any form of electronic, verbal, or written communication sent
55
- to the Licensor or its representatives, including but not limited to
56
- communication on electronic mailing lists, source code control systems,
57
- and issue tracking systems that are managed by, or on behalf of, the
58
- Licensor for the purpose of discussing and improving the Work, but
59
- excluding communication that is conspicuously marked or otherwise
60
- designated in writing by the copyright owner as "Not a Contribution."
61
-
62
- "Contributor" shall mean Licensor and any individual or Legal Entity
63
- on behalf of whom a Contribution has been received by Licensor and
64
- subsequently incorporated within the Work.
65
-
66
- 2. Grant of Copyright License. Subject to the terms and conditions of
67
- this License, each Contributor hereby grants to You a perpetual,
68
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
- copyright license to reproduce, prepare Derivative Works of,
70
- publicly display, publicly perform, sublicense, and distribute the
71
- Work and such Derivative Works in Source or Object form.
72
-
73
- 3. Grant of Patent License. Subject to the terms and conditions of
74
- this License, each Contributor hereby grants to You a perpetual,
75
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
- (except as stated in this section) patent license to make, have made,
77
- use, offer to sell, sell, import, and otherwise transfer the Work,
78
- where such license applies only to those patent claims licensable
79
- by such Contributor that are necessarily infringed by their
80
- Contribution(s) alone or by combination of their Contribution(s)
81
- with the Work to which such Contribution(s) was submitted. If You
82
- institute patent litigation against any entity (including a
83
- cross-claim or counterclaim in a lawsuit) alleging that the Work
84
- or a Contribution incorporated within the Work constitutes direct
85
- or contributory patent infringement, then any patent licenses
86
- granted to You under this License for that Work shall terminate
87
- as of the date such litigation is filed.
88
-
89
- 4. Redistribution. You may reproduce and distribute copies of the
90
- Work or Derivative Works thereof in any medium, with or without
91
- modifications, and in Source or Object form, provided that You
92
- meet the following conditions:
93
-
94
- (a) You must give any other recipients of the Work or
95
- Derivative Works a copy of this License; and
96
-
97
- (b) You must cause any modified files to carry prominent notices
98
- stating that You changed the files; and
99
-
100
- (c) You must retain, in the Source form of any Derivative Works
101
- that You distribute, all copyright, patent, trademark, and
102
- attribution notices from the Source form of the Work,
103
- excluding those notices that do not pertain to any part of
104
- the Derivative Works; and
105
-
106
- (d) If the Work includes a "NOTICE" text file as part of its
107
- distribution, then any Derivative Works that You distribute must
108
- include a readable copy of the attribution notices contained
109
- within such NOTICE file, excluding those notices that do not
110
- pertain to any part of the Derivative Works, in at least one
111
- of the following places: within a NOTICE text file distributed
112
- as part of the Derivative Works; within the Source form or
113
- documentation, if provided along with the Derivative Works; or,
114
- within a display generated by the Derivative Works, if and
115
- wherever such third-party notices normally appear. The contents
116
- of the NOTICE file are for informational purposes only and
117
- do not modify the License. You may add Your own attribution
118
- notices within Derivative Works that You distribute, alongside
119
- or as an addendum to the NOTICE text from the Work, provided
120
- that such additional attribution notices cannot be construed
121
- as modifying the License.
122
-
123
- You may add Your own copyright statement to Your modifications and
124
- may provide additional or different license terms and conditions
125
- for use, reproduction, or distribution of Your modifications, or
126
- for any such Derivative Works as a whole, provided Your use,
127
- reproduction, and distribution of the Work otherwise complies with
128
- the conditions stated in this License.
129
-
130
- 5. Submission of Contributions. Unless You explicitly state otherwise,
131
- any Contribution intentionally submitted for inclusion in the Work
132
- by You to the Licensor shall be under the terms and conditions of
133
- this License, without any additional terms or conditions.
134
- Notwithstanding the above, nothing herein shall supersede or modify
135
- the terms of any separate license agreement you may have executed
136
- with Licensor regarding such Contributions.
137
-
138
- 6. Trademarks. This License does not grant permission to use the trade
139
- names, trademarks, service marks, or product names of the Licensor,
140
- except as required for reasonable and customary use in describing the
141
- origin of the Work and reproducing the content of the NOTICE file.
142
-
143
- 7. Disclaimer of Warranty. Unless required by applicable law or
144
- agreed to in writing, Licensor provides the Work (and each
145
- Contributor provides its Contributions) on an "AS IS" BASIS,
146
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
- implied, including, without limitation, any warranties or conditions
148
- of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
- PARTICULAR PURPOSE. You are solely responsible for determining the
150
- appropriateness of using or redistributing the Work and assume any
151
- risks associated with Your exercise of permissions under this License.
152
-
153
- 8. Limitation of Liability. In no event and under no legal theory,
154
- whether in tort (including negligence), contract, or otherwise,
155
- unless required by applicable law (such as deliberate and grossly
156
- negligent acts) or agreed to in writing, shall any Contributor be
157
- liable to You for damages, including any direct, indirect, special,
158
- incidental, or consequential damages of any character arising as a
159
- result of this License or out of the use or inability to use the
160
- Work (including but not limited to damages for loss of goodwill,
161
- work stoppage, computer failure or malfunction, or any and all
162
- other commercial damages or losses), even if such Contributor
163
- has been advised of the possibility of such damages.
164
-
165
- 9. Accepting Warranty or Additional Liability. While redistributing
166
- the Work or Derivative Works thereof, You may choose to offer,
167
- and charge a fee for, acceptance of support, warranty, indemnity,
168
- or other liability obligations and/or rights consistent with this
169
- License. However, in accepting such obligations, You may act only
170
- on Your own behalf and on Your sole responsibility, not on behalf
171
- of any other Contributor, and only if You agree to indemnify,
172
- defend, and hold each Contributor harmless for any liability
173
- incurred by, or claims asserted against, such Contributor by reason
174
- of your accepting any such warranty or additional liability.
175
-
176
- END OF TERMS AND CONDITIONS
177
-
178
- APPENDIX: How to apply the Apache License to your work.
179
-
180
- To apply the Apache License to your work, attach the following
181
- boilerplate notice, with the fields enclosed by brackets "[]"
182
- replaced with your own identifying information. (Don't include
183
- the brackets!) The text should be enclosed in the appropriate
184
- comment syntax for the file format. We also recommend that a
185
- file or class name and description of purpose be included on the
186
- same "printed page" as the copyright notice for easier
187
- identification within third-party archives.
188
-
189
- Copyright [yyyy] [name of copyright owner]
190
-
191
- Licensed under the Apache License, Version 2.0 (the "License");
192
- you may not use this file except in compliance with the License.
193
- You may obtain a copy of the License at
194
-
195
- http://www.apache.org/licenses/LICENSE-2.0
196
-
197
- Unless required by applicable law or agreed to in writing, software
198
- distributed under the License is distributed on an "AS IS" BASIS,
199
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
- See the License for the specific language governing permissions and
201
- limitations under the License.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vicuna.13b/config.json DELETED
@@ -1,23 +0,0 @@
1
- {
2
- "_name_or_path": "../LLM.ckpts/llama.13b/",
3
- "architectures": [
4
- "LlamaForCausalLM"
5
- ],
6
- "bos_token_id": 1,
7
- "eos_token_id": 2,
8
- "hidden_act": "silu",
9
- "hidden_size": 5120,
10
- "initializer_range": 0.02,
11
- "intermediate_size": 13824,
12
- "max_position_embeddings": 2048,
13
- "model_type": "llama",
14
- "num_attention_heads": 40,
15
- "num_hidden_layers": 40,
16
- "pad_token_id": 0,
17
- "rms_norm_eps": 1e-06,
18
- "tie_word_embeddings": false,
19
- "torch_dtype": "float16",
20
- "transformers_version": "4.28.1",
21
- "use_cache": true,
22
- "vocab_size": 32000
23
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vicuna.13b/generation_config.json DELETED
@@ -1,7 +0,0 @@
1
- {
2
- "_from_model_config": true,
3
- "bos_token_id": 1,
4
- "eos_token_id": 2,
5
- "pad_token_id": 0,
6
- "transformers_version": "4.28.1"
7
- }
 
 
 
 
 
 
 
 
vicuna.13b/pytorch_model-00001-of-00003.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e754ec47918eb6569468a1fbdc68ee376202eb4e34c97a05951d894e195d296
3
- size 9948728430
 
 
 
 
vicuna.13b/pytorch_model-00002-of-00003.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:eecea1120efcd762af48bf54d7d5ff9ef3128cc33f144533dfc5a926fb6c541c
3
- size 9904165024
 
 
 
 
vicuna.13b/pytorch_model-00003-of-00003.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf1ed63a11c0d9176006fe49914eaa911f0e73c2aaf614c11f8534ec934d7a89
3
- size 6506663689
 
 
 
 
vicuna.13b/pytorch_model.bin.index.json DELETED
@@ -1,410 +0,0 @@
1
- {
2
- "metadata": {
3
- "total_size": 26031738880
4
- },
5
- "weight_map": {
6
- "lm_head.weight": "pytorch_model-00003-of-00003.bin",
7
- "model.embed_tokens.weight": "pytorch_model-00001-of-00003.bin",
8
- "model.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
9
- "model.layers.0.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
10
- "model.layers.0.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
11
- "model.layers.0.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
12
- "model.layers.0.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
13
- "model.layers.0.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
14
- "model.layers.0.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
15
- "model.layers.0.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
16
- "model.layers.0.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin",
17
- "model.layers.0.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
18
- "model.layers.1.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
19
- "model.layers.1.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
20
- "model.layers.1.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
21
- "model.layers.1.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
22
- "model.layers.1.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
23
- "model.layers.1.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
24
- "model.layers.1.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
25
- "model.layers.1.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
26
- "model.layers.1.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin",
27
- "model.layers.1.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
28
- "model.layers.10.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
29
- "model.layers.10.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
30
- "model.layers.10.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
31
- "model.layers.10.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
32
- "model.layers.10.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
33
- "model.layers.10.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
34
- "model.layers.10.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
35
- "model.layers.10.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
36
- "model.layers.10.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin",
37
- "model.layers.10.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
38
- "model.layers.11.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
39
- "model.layers.11.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
40
- "model.layers.11.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
41
- "model.layers.11.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
42
- "model.layers.11.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
43
- "model.layers.11.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
44
- "model.layers.11.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
45
- "model.layers.11.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
46
- "model.layers.11.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin",
47
- "model.layers.11.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
48
- "model.layers.12.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
49
- "model.layers.12.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
50
- "model.layers.12.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
51
- "model.layers.12.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
52
- "model.layers.12.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
53
- "model.layers.12.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
54
- "model.layers.12.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
55
- "model.layers.12.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
56
- "model.layers.12.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin",
57
- "model.layers.12.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
58
- "model.layers.13.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
59
- "model.layers.13.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
60
- "model.layers.13.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
61
- "model.layers.13.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
62
- "model.layers.13.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
63
- "model.layers.13.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
64
- "model.layers.13.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
65
- "model.layers.13.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
66
- "model.layers.13.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin",
67
- "model.layers.13.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
68
- "model.layers.14.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
69
- "model.layers.14.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
70
- "model.layers.14.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
71
- "model.layers.14.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
72
- "model.layers.14.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
73
- "model.layers.14.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
74
- "model.layers.14.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
75
- "model.layers.14.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
76
- "model.layers.14.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin",
77
- "model.layers.14.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
78
- "model.layers.15.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
79
- "model.layers.15.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
80
- "model.layers.15.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
81
- "model.layers.15.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
82
- "model.layers.15.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
83
- "model.layers.15.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
84
- "model.layers.15.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
85
- "model.layers.15.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
86
- "model.layers.15.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin",
87
- "model.layers.15.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
88
- "model.layers.16.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
89
- "model.layers.16.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
90
- "model.layers.16.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
91
- "model.layers.16.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
92
- "model.layers.16.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
93
- "model.layers.16.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
94
- "model.layers.16.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
95
- "model.layers.16.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
96
- "model.layers.16.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin",
97
- "model.layers.16.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
98
- "model.layers.17.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
99
- "model.layers.17.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
100
- "model.layers.17.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
101
- "model.layers.17.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
102
- "model.layers.17.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
103
- "model.layers.17.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
104
- "model.layers.17.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
105
- "model.layers.17.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
106
- "model.layers.17.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin",
107
- "model.layers.17.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
108
- "model.layers.18.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
109
- "model.layers.18.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
110
- "model.layers.18.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
111
- "model.layers.18.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
112
- "model.layers.18.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
113
- "model.layers.18.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
114
- "model.layers.18.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
115
- "model.layers.18.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
116
- "model.layers.18.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin",
117
- "model.layers.18.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
118
- "model.layers.19.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
119
- "model.layers.19.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
120
- "model.layers.19.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
121
- "model.layers.19.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
122
- "model.layers.19.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
123
- "model.layers.19.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
124
- "model.layers.19.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
125
- "model.layers.19.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
126
- "model.layers.19.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin",
127
- "model.layers.19.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
128
- "model.layers.2.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
129
- "model.layers.2.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
130
- "model.layers.2.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
131
- "model.layers.2.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
132
- "model.layers.2.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
133
- "model.layers.2.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
134
- "model.layers.2.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
135
- "model.layers.2.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
136
- "model.layers.2.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin",
137
- "model.layers.2.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
138
- "model.layers.20.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
139
- "model.layers.20.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
140
- "model.layers.20.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
141
- "model.layers.20.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
142
- "model.layers.20.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
143
- "model.layers.20.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
144
- "model.layers.20.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
145
- "model.layers.20.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
146
- "model.layers.20.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin",
147
- "model.layers.20.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
148
- "model.layers.21.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
149
- "model.layers.21.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
150
- "model.layers.21.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
151
- "model.layers.21.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
152
- "model.layers.21.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
153
- "model.layers.21.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
154
- "model.layers.21.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
155
- "model.layers.21.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
156
- "model.layers.21.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin",
157
- "model.layers.21.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
158
- "model.layers.22.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
159
- "model.layers.22.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
160
- "model.layers.22.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
161
- "model.layers.22.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
162
- "model.layers.22.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
163
- "model.layers.22.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
164
- "model.layers.22.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
165
- "model.layers.22.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
166
- "model.layers.22.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin",
167
- "model.layers.22.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
168
- "model.layers.23.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
169
- "model.layers.23.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
170
- "model.layers.23.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
171
- "model.layers.23.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
172
- "model.layers.23.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
173
- "model.layers.23.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
174
- "model.layers.23.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
175
- "model.layers.23.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
176
- "model.layers.23.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin",
177
- "model.layers.23.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
178
- "model.layers.24.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
179
- "model.layers.24.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
180
- "model.layers.24.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
181
- "model.layers.24.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
182
- "model.layers.24.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
183
- "model.layers.24.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
184
- "model.layers.24.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
185
- "model.layers.24.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
186
- "model.layers.24.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin",
187
- "model.layers.24.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
188
- "model.layers.25.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
189
- "model.layers.25.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
190
- "model.layers.25.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
191
- "model.layers.25.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
192
- "model.layers.25.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
193
- "model.layers.25.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
194
- "model.layers.25.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
195
- "model.layers.25.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
196
- "model.layers.25.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin",
197
- "model.layers.25.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
198
- "model.layers.26.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
199
- "model.layers.26.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
200
- "model.layers.26.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
201
- "model.layers.26.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
202
- "model.layers.26.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
203
- "model.layers.26.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
204
- "model.layers.26.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
205
- "model.layers.26.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
206
- "model.layers.26.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin",
207
- "model.layers.26.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
208
- "model.layers.27.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
209
- "model.layers.27.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
210
- "model.layers.27.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
211
- "model.layers.27.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
212
- "model.layers.27.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
213
- "model.layers.27.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
214
- "model.layers.27.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
215
- "model.layers.27.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
216
- "model.layers.27.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin",
217
- "model.layers.27.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
218
- "model.layers.28.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
219
- "model.layers.28.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
220
- "model.layers.28.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
221
- "model.layers.28.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
222
- "model.layers.28.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
223
- "model.layers.28.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
224
- "model.layers.28.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
225
- "model.layers.28.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
226
- "model.layers.28.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin",
227
- "model.layers.28.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
228
- "model.layers.29.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
229
- "model.layers.29.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
230
- "model.layers.29.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
231
- "model.layers.29.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin",
232
- "model.layers.29.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
233
- "model.layers.29.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
234
- "model.layers.29.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
235
- "model.layers.29.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
236
- "model.layers.29.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin",
237
- "model.layers.29.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
238
- "model.layers.3.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
239
- "model.layers.3.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
240
- "model.layers.3.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
241
- "model.layers.3.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
242
- "model.layers.3.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
243
- "model.layers.3.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
244
- "model.layers.3.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
245
- "model.layers.3.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
246
- "model.layers.3.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin",
247
- "model.layers.3.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
248
- "model.layers.30.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
249
- "model.layers.30.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin",
250
- "model.layers.30.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin",
251
- "model.layers.30.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin",
252
- "model.layers.30.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
253
- "model.layers.30.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin",
254
- "model.layers.30.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin",
255
- "model.layers.30.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin",
256
- "model.layers.30.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin",
257
- "model.layers.30.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin",
258
- "model.layers.31.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
259
- "model.layers.31.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
260
- "model.layers.31.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin",
261
- "model.layers.31.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin",
262
- "model.layers.31.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
263
- "model.layers.31.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin",
264
- "model.layers.31.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
265
- "model.layers.31.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin",
266
- "model.layers.31.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin",
267
- "model.layers.31.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin",
268
- "model.layers.32.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
269
- "model.layers.32.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
270
- "model.layers.32.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin",
271
- "model.layers.32.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin",
272
- "model.layers.32.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
273
- "model.layers.32.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin",
274
- "model.layers.32.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
275
- "model.layers.32.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin",
276
- "model.layers.32.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin",
277
- "model.layers.32.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin",
278
- "model.layers.33.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
279
- "model.layers.33.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
280
- "model.layers.33.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin",
281
- "model.layers.33.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin",
282
- "model.layers.33.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
283
- "model.layers.33.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin",
284
- "model.layers.33.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
285
- "model.layers.33.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin",
286
- "model.layers.33.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin",
287
- "model.layers.33.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin",
288
- "model.layers.34.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
289
- "model.layers.34.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
290
- "model.layers.34.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin",
291
- "model.layers.34.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin",
292
- "model.layers.34.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
293
- "model.layers.34.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin",
294
- "model.layers.34.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
295
- "model.layers.34.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin",
296
- "model.layers.34.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin",
297
- "model.layers.34.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin",
298
- "model.layers.35.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
299
- "model.layers.35.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
300
- "model.layers.35.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin",
301
- "model.layers.35.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin",
302
- "model.layers.35.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
303
- "model.layers.35.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin",
304
- "model.layers.35.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
305
- "model.layers.35.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin",
306
- "model.layers.35.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin",
307
- "model.layers.35.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin",
308
- "model.layers.36.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
309
- "model.layers.36.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
310
- "model.layers.36.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin",
311
- "model.layers.36.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin",
312
- "model.layers.36.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
313
- "model.layers.36.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin",
314
- "model.layers.36.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
315
- "model.layers.36.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin",
316
- "model.layers.36.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin",
317
- "model.layers.36.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin",
318
- "model.layers.37.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
319
- "model.layers.37.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
320
- "model.layers.37.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin",
321
- "model.layers.37.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin",
322
- "model.layers.37.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
323
- "model.layers.37.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin",
324
- "model.layers.37.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
325
- "model.layers.37.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin",
326
- "model.layers.37.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin",
327
- "model.layers.37.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin",
328
- "model.layers.38.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
329
- "model.layers.38.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
330
- "model.layers.38.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin",
331
- "model.layers.38.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin",
332
- "model.layers.38.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
333
- "model.layers.38.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin",
334
- "model.layers.38.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
335
- "model.layers.38.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin",
336
- "model.layers.38.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin",
337
- "model.layers.38.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin",
338
- "model.layers.39.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
339
- "model.layers.39.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin",
340
- "model.layers.39.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin",
341
- "model.layers.39.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin",
342
- "model.layers.39.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
343
- "model.layers.39.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin",
344
- "model.layers.39.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin",
345
- "model.layers.39.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin",
346
- "model.layers.39.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin",
347
- "model.layers.39.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin",
348
- "model.layers.4.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
349
- "model.layers.4.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
350
- "model.layers.4.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
351
- "model.layers.4.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
352
- "model.layers.4.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
353
- "model.layers.4.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
354
- "model.layers.4.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
355
- "model.layers.4.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
356
- "model.layers.4.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin",
357
- "model.layers.4.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
358
- "model.layers.5.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
359
- "model.layers.5.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
360
- "model.layers.5.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
361
- "model.layers.5.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
362
- "model.layers.5.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
363
- "model.layers.5.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
364
- "model.layers.5.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
365
- "model.layers.5.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
366
- "model.layers.5.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin",
367
- "model.layers.5.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
368
- "model.layers.6.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
369
- "model.layers.6.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
370
- "model.layers.6.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
371
- "model.layers.6.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
372
- "model.layers.6.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
373
- "model.layers.6.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
374
- "model.layers.6.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
375
- "model.layers.6.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
376
- "model.layers.6.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin",
377
- "model.layers.6.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
378
- "model.layers.7.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
379
- "model.layers.7.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
380
- "model.layers.7.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
381
- "model.layers.7.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
382
- "model.layers.7.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
383
- "model.layers.7.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
384
- "model.layers.7.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
385
- "model.layers.7.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
386
- "model.layers.7.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin",
387
- "model.layers.7.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
388
- "model.layers.8.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
389
- "model.layers.8.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
390
- "model.layers.8.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
391
- "model.layers.8.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
392
- "model.layers.8.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
393
- "model.layers.8.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
394
- "model.layers.8.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
395
- "model.layers.8.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
396
- "model.layers.8.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin",
397
- "model.layers.8.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
398
- "model.layers.9.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
399
- "model.layers.9.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin",
400
- "model.layers.9.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin",
401
- "model.layers.9.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin",
402
- "model.layers.9.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
403
- "model.layers.9.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin",
404
- "model.layers.9.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin",
405
- "model.layers.9.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin",
406
- "model.layers.9.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin",
407
- "model.layers.9.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin",
408
- "model.norm.weight": "pytorch_model-00003-of-00003.bin"
409
- }
410
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vicuna.13b/special_tokens_map.json DELETED
@@ -1,23 +0,0 @@
1
- {
2
- "bos_token": {
3
- "content": "<s>",
4
- "lstrip": false,
5
- "normalized": true,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "eos_token": {
10
- "content": "</s>",
11
- "lstrip": false,
12
- "normalized": true,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "unk_token": {
17
- "content": "<unk>",
18
- "lstrip": false,
19
- "normalized": true,
20
- "rstrip": false,
21
- "single_word": false
22
- }
23
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vicuna.13b/tokenizer.model DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
- size 499723
 
 
 
 
vicuna.13b/tokenizer_config.json DELETED
@@ -1,33 +0,0 @@
1
- {
2
- "add_bos_token": true,
3
- "add_eos_token": false,
4
- "bos_token": {
5
- "__type": "AddedToken",
6
- "content": "<s>",
7
- "lstrip": false,
8
- "normalized": true,
9
- "rstrip": false,
10
- "single_word": false
11
- },
12
- "clean_up_tokenization_spaces": false,
13
- "eos_token": {
14
- "__type": "AddedToken",
15
- "content": "</s>",
16
- "lstrip": false,
17
- "normalized": true,
18
- "rstrip": false,
19
- "single_word": false
20
- },
21
- "model_max_length": 1000000000000000019884624838656,
22
- "pad_token": null,
23
- "sp_model_kwargs": {},
24
- "tokenizer_class": "LlamaTokenizer",
25
- "unk_token": {
26
- "__type": "AddedToken",
27
- "content": "<unk>",
28
- "lstrip": false,
29
- "normalized": true,
30
- "rstrip": false,
31
- "single_word": false
32
- }
33
- }