zhiminy commited on
Commit
9b3a3bd
1 Parent(s): bedcfff

add more models

Browse files
Files changed (3) hide show
  1. .gitignore +1 -0
  2. app.py +18 -26
  3. context_window.json +31 -21
.gitignore CHANGED
@@ -1,3 +1,4 @@
1
  *.env
2
  *.venv
3
  *.pem
 
 
1
  *.env
2
  *.venv
3
  *.pem
4
+ *.ipynb
app.py CHANGED
@@ -6,36 +6,25 @@ import os
6
  import random
7
  import threading
8
 
9
- import aisuite as ai
10
  import gradio as gr
11
  import pandas as pd
12
 
13
  from huggingface_hub import upload_file, hf_hub_download, HfFolder, HfApi
14
  from datetime import datetime
15
  from gradio_leaderboard import Leaderboard
 
16
 
17
  # Load environment variables
18
  dotenv.load_dotenv()
19
 
20
- # Retrieve the secret from the environment
21
- gcp_credentials = os.environ.get("GCP_CREDENTIALS")
 
 
22
 
23
- # Write it to a file
24
- credentials_path = (
25
- "/tmp/gcp_credentials.json" # Ensure this path is secure and temporary
26
- )
27
- with open(credentials_path, "w") as f:
28
- f.write(gcp_credentials)
29
-
30
- # Set the environment variable for GCP SDKs
31
- os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = credentials_path
32
-
33
- # Timeout in seconds for model response
34
  TIMEOUT = 60
35
 
36
- # Initialize AISuite Client
37
- client = ai.Client()
38
-
39
  # Hint string constant
40
  SHOW_HINT_STRING = True # Set to False to hide the hint string altogether
41
  HINT_STRING = "Once signed in, your votes will be recorded securely."
@@ -75,7 +64,9 @@ def truncate_prompt(user_input, model_alias, models, conversation_state):
75
 
76
  # Get the full conversation history for the model
77
  history = conversation_state.get(model_name, [])
78
- full_conversation = [{"role": msg["role"], "content": msg["content"]} for msg in history]
 
 
79
  full_conversation.append({"role": "user", "content": user_input})
80
 
81
  # Convert to JSON string for accurate length measurement
@@ -100,7 +91,9 @@ def chat_with_models(
100
  user_input, model_alias, models, conversation_state, timeout=TIMEOUT
101
  ):
102
  model_name = models[model_alias]
103
- truncated_input = truncate_prompt(user_input, model_alias, models, conversation_state)
 
 
104
  conversation_state.setdefault(model_name, []).append(
105
  {"role": "user", "content": user_input}
106
  )
@@ -110,10 +103,12 @@ def chat_with_models(
110
 
111
  def request_model_response():
112
  try:
113
- response = client.chat.completions.create(
114
- model=model_name,
115
- messages=truncated_input,
116
- )
 
 
117
  model_response["content"] = response.choices[0].message.content
118
  except Exception as e:
119
  model_response["error"] = f"{model_name} model is not available. Error: {e}"
@@ -128,15 +123,12 @@ def chat_with_models(
128
  response_event_occurred = response_event.wait(timeout)
129
 
130
  if not response_event_occurred:
131
- # Timeout occurred, raise a TimeoutError to be handled in the Gradio interface
132
  raise TimeoutError(
133
  f"The {model_alias} model did not respond within {timeout} seconds."
134
  )
135
  elif model_response["error"]:
136
- # An error occurred during model response
137
  raise Exception(model_response["error"])
138
  else:
139
- # Successful response
140
  formatted_response = f"```\n{model_response['content']}\n```"
141
  conversation_state[model_name].append(
142
  {"role": "assistant", "content": model_response["content"]}
 
6
  import random
7
  import threading
8
 
 
9
  import gradio as gr
10
  import pandas as pd
11
 
12
  from huggingface_hub import upload_file, hf_hub_download, HfFolder, HfApi
13
  from datetime import datetime
14
  from gradio_leaderboard import Leaderboard
15
+ from openai import OpenAI
16
 
17
  # Load environment variables
18
  dotenv.load_dotenv()
19
 
20
+ # Initialize OpenAI Client
21
+ api_key = os.getenv("API_KEY")
22
+ base_url = "https://api.pandalla.ai/v1"
23
+ openai_client = OpenAI(api_key=api_key, base_url=base_url)
24
 
25
+ # Timeout in seconds for model responses
 
 
 
 
 
 
 
 
 
 
26
  TIMEOUT = 60
27
 
 
 
 
28
  # Hint string constant
29
  SHOW_HINT_STRING = True # Set to False to hide the hint string altogether
30
  HINT_STRING = "Once signed in, your votes will be recorded securely."
 
64
 
65
  # Get the full conversation history for the model
66
  history = conversation_state.get(model_name, [])
67
+ full_conversation = [
68
+ {"role": msg["role"], "content": msg["content"]} for msg in history
69
+ ]
70
  full_conversation.append({"role": "user", "content": user_input})
71
 
72
  # Convert to JSON string for accurate length measurement
 
91
  user_input, model_alias, models, conversation_state, timeout=TIMEOUT
92
  ):
93
  model_name = models[model_alias]
94
+ truncated_input = truncate_prompt(
95
+ user_input, model_alias, models, conversation_state
96
+ )
97
  conversation_state.setdefault(model_name, []).append(
98
  {"role": "user", "content": user_input}
99
  )
 
103
 
104
  def request_model_response():
105
  try:
106
+ request_params = {
107
+ "model": model_name,
108
+ "messages": truncated_input,
109
+ "temperature": 0,
110
+ }
111
+ response = openai_client.chat.completions.create(**request_params)
112
  model_response["content"] = response.choices[0].message.content
113
  except Exception as e:
114
  model_response["error"] = f"{model_name} model is not available. Error: {e}"
 
123
  response_event_occurred = response_event.wait(timeout)
124
 
125
  if not response_event_occurred:
 
126
  raise TimeoutError(
127
  f"The {model_alias} model did not respond within {timeout} seconds."
128
  )
129
  elif model_response["error"]:
 
130
  raise Exception(model_response["error"])
131
  else:
 
132
  formatted_response = f"```\n{model_response['content']}\n```"
133
  conversation_state[model_name].append(
134
  {"role": "assistant", "content": model_response["content"]}
context_window.json CHANGED
@@ -1,23 +1,33 @@
1
  {
2
- "anthropic:claude-3-5-sonnet-latest": 200000,
3
- "anthropic:claude-3-5-haiku-latest": 200000,
4
- "anthropic:claude-3-sonnet-20240229": 200000,
5
- "anthropic:claude-3-haiku-20240307": 200000,
6
- "anthropic:claude-3-opus-latest": 200000,
7
- "google:gemini-1.5-flash": 1048576,
8
- "google:gemini-1.5-pro": 2097152,
9
- "groq:gemma2-9b-it": 8192,
10
- "groq:gemma-7b-it": 8192,
11
- "groq:llama-3.1-8b-instant": 128000,
12
- "groq:llama-3.1-70b-versatile": 128000,
13
- "groq:llama-3.2-1b-preview": 128000,
14
- "groq:llama-3.2-3b-preview": 128000,
15
- "openai:gpt-3.5-turbo": 16385,
16
- "openai:gpt-4": 8192,
17
- "openai:gpt-4-turbo": 128000,
18
- "openai:gpt-4o": 128000,
19
- "openai:chatgpt-4o-latest": 128000,
20
- "openai:gpt-4o-mini": 128000,
21
- "openai:o1-preview": 128000,
22
- "openai:o1-mini": 128000
 
 
 
 
 
 
 
 
 
 
23
  }
 
1
  {
2
+ "gpt-3.5-turbo": 16000,
3
+ "gpt-3.5-turbo-16k": 16000,
4
+ "gpt-3.5-turbo-instruct" : 16000,
5
+ "gpt-4": 8192,
6
+ "gpt-4-32k": 32000,
7
+ "gpt-4-turbo": 128000,
8
+ "gpt-4o": 128000,
9
+ "gpt-4o-mini": 128000,
10
+ "chatgpt-4o-latest": 128000,
11
+ "claude-3-5-sonnet-latest" : 200000,
12
+ "deepseek-chat": 64000,
13
+ "gemini-1.5-flash-latest": 1048576,
14
+ "gemini-1.5-pro-latest": 2097152,
15
+ "Hunyuan-A52B-Instruct": 128000,
16
+ "llama-3-70b": 128000,
17
+ "llama-3.1-405b": 128000,
18
+ "llama-3.1-70b": 128000,
19
+ "llama-3.1-8b": 128000,
20
+ "llama-3.3-70b": 128000,
21
+ "llama-v3.2-3b": 128000,
22
+ "o1-all": 128000,
23
+ "o1-mini-all": 128000,
24
+ "o1-preview-all": 128000,
25
+ "Qwen2-72B-Instruct": 131072,
26
+ "Qwen2.5-32B-Instruct": 131072,
27
+ "qwen2.5-72b": 32768,
28
+ "Qwen2.5-72B-Instruct": 131072,
29
+ "Qwen2.5-72B-Instruct-128k": 131072,
30
+ "Qwen2.5-Coder-32B-Instruct": 131072,
31
+ "QwQ-32B-Preview": 32768,
32
+ "yi-large": 32000
33
  }