SrCh1nask1 commited on
Commit
d5cf92d
·
verified ·
1 Parent(s): 554a35d

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +95 -1
README.md CHANGED
@@ -61,4 +61,98 @@ pipeline = transformers.pipeline(
61
 
62
  outputs = pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
63
  print(outputs[0]["generated_text"])
64
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
  outputs = pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
63
  print(outputs[0]["generated_text"])
64
+ ```
65
+
66
+
67
+ @misc{open-llm-leaderboard,
68
+ author = {Edward Beeching and Clémentine Fourrier and Nathan Habib and Sheon Han and Nathan Lambert and Nazneen Rajani and Omar Sanseviero and Lewis Tunstall and Thomas Wolf},
69
+ title = {Open LLM Leaderboard},
70
+ year = {2023},
71
+ publisher = {Hugging Face},
72
+ howpublished = "\url{https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard}"
73
+ }
74
+ @software{eval-harness,
75
+ author = {Gao, Leo and
76
+ Tow, Jonathan and
77
+ Biderman, Stella and
78
+ Black, Sid and
79
+ DiPofi, Anthony and
80
+ Foster, Charles and
81
+ Golding, Laurence and
82
+ Hsu, Jeffrey and
83
+ McDonell, Kyle and
84
+ Muennighoff, Niklas and
85
+ Phang, Jason and
86
+ Reynolds, Laria and
87
+ Tang, Eric and
88
+ Thite, Anish and
89
+ Wang, Ben and
90
+ Wang, Kevin and
91
+ Zou, Andy},
92
+ title = {A framework for few-shot language model evaluation},
93
+ month = sep,
94
+ year = 2021,
95
+ publisher = {Zenodo},
96
+ version = {v0.0.1},
97
+ doi = {10.5281/zenodo.5371628},
98
+ url = {https://doi.org/10.5281/zenodo.5371628}
99
+ }
100
+ @misc{clark2018think,
101
+ title={Think you have Solved Question Answering? Try ARC, the AI2 Reasoning Challenge},
102
+ author={Peter Clark and Isaac Cowhey and Oren Etzioni and Tushar Khot and Ashish Sabharwal and Carissa Schoenick and Oyvind Tafjord},
103
+ year={2018},
104
+ eprint={1803.05457},
105
+ archivePrefix={arXiv},
106
+ primaryClass={cs.AI}
107
+ }
108
+ @misc{zellers2019hellaswag,
109
+ title={HellaSwag: Can a Machine Really Finish Your Sentence?},
110
+ author={Rowan Zellers and Ari Holtzman and Yonatan Bisk and Ali Farhadi and Yejin Choi},
111
+ year={2019},
112
+ eprint={1905.07830},
113
+ archivePrefix={arXiv},
114
+ primaryClass={cs.CL}
115
+ }
116
+ @misc{hendrycks2021measuring,
117
+ title={Measuring Massive Multitask Language Understanding},
118
+ author={Dan Hendrycks and Collin Burns and Steven Basart and Andy Zou and Mantas Mazeika and Dawn Song and Jacob Steinhardt},
119
+ year={2021},
120
+ eprint={2009.03300},
121
+ archivePrefix={arXiv},
122
+ primaryClass={cs.CY}
123
+ }
124
+ @misc{lin2022truthfulqa,
125
+ title={TruthfulQA: Measuring How Models Mimic Human Falsehoods},
126
+ author={Stephanie Lin and Jacob Hilton and Owain Evans},
127
+ year={2022},
128
+ eprint={2109.07958},
129
+ archivePrefix={arXiv},
130
+ primaryClass={cs.CL}
131
+ }
132
+ @misc{DBLP:journals/corr/abs-1907-10641,
133
+ title={{WINOGRANDE:} An Adversarial Winograd Schema Challenge at Scale},
134
+ author={Keisuke Sakaguchi and Ronan Le Bras and Chandra Bhagavatula and Yejin Choi},
135
+ year={2019},
136
+ eprint={1907.10641},
137
+ archivePrefix={arXiv},
138
+ primaryClass={cs.CL}
139
+ }
140
+ @misc{DBLP:journals/corr/abs-2110-14168,
141
+ title={Training Verifiers to Solve Math Word Problems},
142
+ author={Karl Cobbe and
143
+ Vineet Kosaraju and
144
+ Mohammad Bavarian and
145
+ Mark Chen and
146
+ Heewoo Jun and
147
+ Lukasz Kaiser and
148
+ Matthias Plappert and
149
+ Jerry Tworek and
150
+ Jacob Hilton and
151
+ Reiichiro Nakano and
152
+ Christopher Hesse and
153
+ John Schulman},
154
+ year={2021},
155
+ eprint={2110.14168},
156
+ archivePrefix={arXiv},
157
+ primaryClass={cs.CL}
158
+ }