Add chatgpt alpaca eval

Browse files

Files changed (5) hide show

alpaca_eval/chatgpt_fn_--phi-2-alpaca-gpt4-dpo/alpaca_eval_log.txt +632 -0
alpaca_eval/chatgpt_fn_--phi-2-alpaca-gpt4-dpo/annotation_chatgpt_fn.json +0 -0
alpaca_eval/chatgpt_fn_--phi-2-alpaca-gpt4-dpo/leaderboard.csv +14 -0
alpaca_eval/chatgpt_fn_--phi-2-alpaca-gpt4-dpo/model_outputs.json +0 -0
alpaca_eval/chatgpt_fn_--phi-2-alpaca-gpt4-dpo/reference_outputs.json +0 -0

alpaca_eval/chatgpt_fn_--phi-2-alpaca-gpt4-dpo/alpaca_eval_log.txt ADDED Viewed

	@@ -0,0 +1,632 @@

+INFO:root:Evaluating the phi-2-alpaca-gpt4-dpo outputs.
+INFO:root:Creating the annotator from `chatgpt_fn`.
+INFO:root:Saving annotations to `/home/hangyu5/Documents/Git-repoMy/AIResearchVault/repo/LLM-infrastructure/alpaca_eval/src/alpaca_eval/evaluators_configs/chatgpt_fn/annotations_seed0_configs.json`.
+INFO:root:Loading all annotations from /home/hangyu5/Documents/Git-repoMy/AIResearchVault/repo/LLM-infrastructure/alpaca_eval/src/alpaca_eval/evaluators_configs/chatgpt_fn/annotations_seed0_configs.json.
+https://api.openai-proxy.org/v1
+INFO:root:Saving all annotations to /home/hangyu5/Documents/Git-repoMy/AIResearchVault/repo/LLM-infrastructure/alpaca_eval/src/alpaca_eval/evaluators_configs/chatgpt_fn/annotations_seed0_configs.json.
+INFO:root:Loading all annotations from /home/hangyu5/Documents/Git-repoMy/AIResearchVault/repo/LLM-infrastructure/alpaca_eval/src/alpaca_eval/evaluators_configs/chatgpt_fn/annotations_seed0_configs.json.
+INFO:root:Saving all annotations to /home/hangyu5/Documents/Git-repoMy/AIResearchVault/repo/LLM-infrastructure/alpaca_eval/src/alpaca_eval/evaluators_configs/chatgpt_fn/annotations_seed0_configs.json.
+INFO:root:Loading all annotations from /home/hangyu5/Documents/Git-repoMy/AIResearchVault/repo/LLM-infrastructure/alpaca_eval/src/alpaca_eval/evaluators_configs/chatgpt_fn/annotations_seed0_configs.json.
+INFO:root:Saving all annotations to /home/hangyu5/Documents/Git-repoMy/AIResearchVault/repo/LLM-infrastructure/alpaca_eval/src/alpaca_eval/evaluators_configs/chatgpt_fn/annotations_seed0_configs.json.
+INFO:root:Loading all annotations from /home/hangyu5/Documents/Git-repoMy/AIResearchVault/repo/LLM-infrastructure/alpaca_eval/src/alpaca_eval/evaluators_configs/chatgpt_fn/annotations_seed0_configs.json.
+INFO:root:Using `openai_completions` on 64 prompts using gpt-3.5-turbo-16k-0613.
+INFO:root:Kwargs to completion: {'n': 1, 'model': 'gpt-3.5-turbo-16k-0613', 'is_chat': True, 'temperature': 0, 'function_call': {'name': 'print_best_model'}, 'functions': [{'name': 'print_best_model', 'description': 'Print the best model given the preferred output.', 'parameters': {'type': 'object', 'properties': {'best_output': {'type': 'string', 'description': "Name of the best output, should be 'Output (a)' or 'Output (b)'"}}}, 'required': ['best_output']}]}. num_procs=5
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:root:Completed 64 examples in 17.3 seconds.
+INFO:root:Saving all annotations to /home/hangyu5/Documents/Git-repoMy/AIResearchVault/repo/LLM-infrastructure/alpaca_eval/src/alpaca_eval/evaluators_configs/chatgpt_fn/annotations_seed0_configs.json.
+INFO:root:Loading all annotations from /home/hangyu5/Documents/Git-repoMy/AIResearchVault/repo/LLM-infrastructure/alpaca_eval/src/alpaca_eval/evaluators_configs/chatgpt_fn/annotations_seed0_configs.json.
+INFO:root:Using `openai_completions` on 128 prompts using gpt-3.5-turbo-16k-0613.
+INFO:root:Kwargs to completion: {'n': 1, 'model': 'gpt-3.5-turbo-16k-0613', 'is_chat': True, 'temperature': 0, 'function_call': {'name': 'print_best_model'}, 'functions': [{'name': 'print_best_model', 'description': 'Print the best model given the preferred output.', 'parameters': {'type': 'object', 'properties': {'best_output': {'type': 'string', 'description': "Name of the best output, should be 'Output (a)' or 'Output (b)'"}}}, 'required': ['best_output']}]}. num_procs=5
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:root:Completed 128 examples in 34.5 seconds.
+INFO:root:Saving all annotations to /home/hangyu5/Documents/Git-repoMy/AIResearchVault/repo/LLM-infrastructure/alpaca_eval/src/alpaca_eval/evaluators_configs/chatgpt_fn/annotations_seed0_configs.json.
+INFO:root:Loading all annotations from /home/hangyu5/Documents/Git-repoMy/AIResearchVault/repo/LLM-infrastructure/alpaca_eval/src/alpaca_eval/evaluators_configs/chatgpt_fn/annotations_seed0_configs.json.
+INFO:root:Using `openai_completions` on 127 prompts using gpt-3.5-turbo-16k-0613.
+INFO:root:Kwargs to completion: {'n': 1, 'model': 'gpt-3.5-turbo-16k-0613', 'is_chat': True, 'temperature': 0, 'function_call': {'name': 'print_best_model'}, 'functions': [{'name': 'print_best_model', 'description': 'Print the best model given the preferred output.', 'parameters': {'type': 'object', 'properties': {'best_output': {'type': 'string', 'description': "Name of the best output, should be 'Output (a)' or 'Output (b)'"}}}, 'required': ['best_output']}]}. num_procs=5
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:root:Completed 127 examples in 33.9 seconds.
+INFO:root:Saving all annotations to /home/hangyu5/Documents/Git-repoMy/AIResearchVault/repo/LLM-infrastructure/alpaca_eval/src/alpaca_eval/evaluators_configs/chatgpt_fn/annotations_seed0_configs.json.
+INFO:root:Loading all annotations from /home/hangyu5/Documents/Git-repoMy/AIResearchVault/repo/LLM-infrastructure/alpaca_eval/src/alpaca_eval/evaluators_configs/chatgpt_fn/annotations_seed0_configs.json.
+INFO:root:Using `openai_completions` on 37 prompts using gpt-3.5-turbo-16k-0613.
+INFO:root:Kwargs to completion: {'n': 1, 'model': 'gpt-3.5-turbo-16k-0613', 'is_chat': True, 'temperature': 0, 'function_call': {'name': 'print_best_model'}, 'functions': [{'name': 'print_best_model', 'description': 'Print the best model given the preferred output.', 'parameters': {'type': 'object', 'properties': {'best_output': {'type': 'string', 'description': "Name of the best output, should be 'Output (a)' or 'Output (b)'"}}}, 'required': ['best_output']}]}. num_procs=5
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:httpx:HTTP Request: POST https://api.openai-proxy.org/v1/chat/completions "HTTP/1.1 200 OK"
+INFO:root:Completed 37 examples in 10.7 seconds.
+INFO:root:Saving all annotations to /home/hangyu5/Documents/Git-repoMy/AIResearchVault/repo/LLM-infrastructure/alpaca_eval/src/alpaca_eval/evaluators_configs/chatgpt_fn/annotations_seed0_configs.json.
+INFO:root:Loading all annotations from /home/hangyu5/Documents/Git-repoMy/AIResearchVault/repo/LLM-infrastructure/alpaca_eval/src/alpaca_eval/evaluators_configs/chatgpt_fn/annotations_seed0_configs.json.
+INFO:root:drop 1 outputs that are not[0, 1, 2]
+INFO:root:Saving all results to output/chatgpt_fn_--phi-2-alpaca-gpt4-dpo-eval
+INFO:root:Saving result to the precomputed leaderboard at /home/hangyu5/Documents/Git-repoMy/AIResearchVault/repo/LLM-infrastructure/alpaca_eval/src/alpaca_eval/leaderboards/data_AlpacaEval/chatgpt_fn_leaderboard.csv
+                       win_rate  standard_error  n_total  avg_length
+gpt4                      73.79            1.54      805        1365
+claude                    70.37            1.60      805        1082
+chatgpt                   66.09            1.66      805         811
+wizardlm-13b              65.16            1.67      805         985
+vicuna-13b                64.10            1.69      805        1037
+guanaco-65b               62.36            1.71      805        1249
+oasst-rlhf-llama-33b      62.05            1.71      805        1079
+alpaca-farm-ppo-human     60.25            1.72      805         803
+falcon-40b-instruct       56.52            1.74      805         662
+phi-2-alpaca-gpt4-dpo     55.60            1.75      804        4532
+text_davinci_003          50.00            0.00      805         307
+alpaca-7b                 45.22            1.74      805         396
+text_davinci_001          28.07            1.56      805         296

alpaca_eval/chatgpt_fn_--phi-2-alpaca-gpt4-dpo/annotation_chatgpt_fn.json ADDED Viewed

The diff for this file is too large to render. See raw diff

alpaca_eval/chatgpt_fn_--phi-2-alpaca-gpt4-dpo/leaderboard.csv ADDED Viewed

	@@ -0,0 +1,14 @@

+,win_rate,standard_error,n_wins,n_wins_base,n_draws,n_total,mode,avg_length
+gpt4,73.7888198757764,1.5359801545073597,588,205,12,805,minimal,1365
+claude,70.37267080745342,1.599519507147828,562,234,9,805,minimal,1082
+chatgpt,66.08695652173913,1.6626479994330317,529,270,6,805,minimal,811
+wizardlm-13b,65.15527950310559,1.670034107787565,520,276,9,805,minimal,985
+vicuna-13b,64.09937888198758,1.6895185863153146,515,288,2,805,minimal,1037
+guanaco-65b,62.36024844720497,1.7086348811605765,502,303,0,805,minimal,1249
+oasst-rlhf-llama-33b,62.0496894409938,1.7080028976103514,498,304,3,805,minimal,1079
+alpaca-farm-ppo-human,60.24844720496895,1.7169496733548772,481,316,8,805,minimal,803
+falcon-40b-instruct,56.52173913043478,1.7438750520312944,453,348,4,805,minimal,662
+phi-2-alpaca-gpt4-dpo,55.59701492537313,1.7533719245384987,447,357,0,804,community,4532
+text_davinci_003,50.0,0.0,0,0,805,805,minimal,307
+alpaca-7b,45.21739130434783,1.7375846781579476,356,433,16,805,minimal,396
+text_davinci_001,28.07453416149068,1.5602183426587484,216,569,20,805,minimal,296

alpaca_eval/chatgpt_fn_--phi-2-alpaca-gpt4-dpo/model_outputs.json ADDED Viewed

The diff for this file is too large to render. See raw diff

alpaca_eval/chatgpt_fn_--phi-2-alpaca-gpt4-dpo/reference_outputs.json ADDED Viewed

The diff for this file is too large to render. See raw diff