Update README.md
Browse files
README.md
CHANGED
@@ -154,6 +154,15 @@ response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
|
154 |
|
155 |
|
156 |
## Reference
|
157 |
-
|
158 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
159 |
|
|
|
154 |
|
155 |
|
156 |
## Reference
|
157 |
+
```
|
158 |
+
@misc{ye2024scalarrewardmodellearning,
|
159 |
+
title={Beyond Scalar Reward Model: Learning Generative Judge from Preference Data},
|
160 |
+
author={Ziyi Ye and Xiangsheng Li and Qiuchi Li and Qingyao Ai and Yujia Zhou and Wei Shen and Dong Yan and Yiqun Liu},
|
161 |
+
year={2024},
|
162 |
+
eprint={2410.03742},
|
163 |
+
archivePrefix={arXiv},
|
164 |
+
primaryClass={cs.CL},
|
165 |
+
url={https://arxiv.org/abs/2410.03742},
|
166 |
+
}
|
167 |
+
```
|
168 |
|