Hansimov commited on
Commit
cd6b52a
1 Parent(s): 3125c87

:gem: [Feature] Enable gpt-3.5 in chat_api

Browse files
apis/chat_api.py CHANGED
@@ -89,8 +89,22 @@ class ChatAPIApp:
89
  def chat_completions(
90
  self, item: ChatCompletionsPostItem, api_key: str = Depends(extract_api_key)
91
  ):
 
 
 
 
92
  streamer = HuggingfaceStreamer(model=item.model)
93
  composer = MessageComposer(model=item.model)
 
 
 
 
 
 
 
 
 
 
94
  if item.stream:
95
  event_source_response = EventSourceResponse(
96
  streamer.chat_return_generator(stream_response),
 
89
  def chat_completions(
90
  self, item: ChatCompletionsPostItem, api_key: str = Depends(extract_api_key)
91
  ):
92
+ if item.model == "gpt-3.5":
93
+ streamer = OpenaiStreamer()
94
+ stream_response = streamer.chat_response(messages=item.messages)
95
+ else:
96
  streamer = HuggingfaceStreamer(model=item.model)
97
  composer = MessageComposer(model=item.model)
98
+ composer.merge(messages=item.messages)
99
+ stream_response = streamer.chat_response(
100
+ prompt=composer.merged_str,
101
+ temperature=item.temperature,
102
+ top_p=item.top_p,
103
+ max_new_tokens=item.max_tokens,
104
+ api_key=api_key,
105
+ use_cache=item.use_cache,
106
+ )
107
+
108
  if item.stream:
109
  event_source_response = EventSourceResponse(
110
  streamer.chat_return_generator(stream_response),
constants/headers.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ OPENAI_GET_HEADERS = {
2
+ # "Accept": "*/*",
3
+ "Accept-Encoding": "gzip, deflate, br, zstd",
4
+ "Accept-Language": "en-US,en;q=0.9",
5
+ "Cache-Control": "no-cache",
6
+ "Content-Type": "application/json",
7
+ # "Oai-Device-Id": self.uuid,
8
+ "Oai-Language": "en-US",
9
+ "Pragma": "no-cache",
10
+ "Referer": "https://chat.openai.com/",
11
+ "Sec-Ch-Ua": 'Google Chrome";v="123", "Not:A-Brand";v="8", "Chromium";v="123"',
12
+ "Sec-Ch-Ua-Mobile": "?0",
13
+ "Sec-Ch-Ua-Platform": '"Windows"',
14
+ "Sec-Fetch-Dest": "empty",
15
+ "Sec-Fetch-Mode": "cors",
16
+ "Sec-Fetch-Site": "same-origin",
17
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
18
+ }
19
+
20
+
21
+ OPENAI_POST_DATA = {
22
+ "action": "next",
23
+ # "messages": self.transform_messages(messages),
24
+ "parent_message_id": "",
25
+ "model": "text-davinci-002-render-sha",
26
+ "timezone_offset_min": -480,
27
+ "suggestions": [],
28
+ "history_and_training_disabled": False,
29
+ "conversation_mode": {"kind": "primary_assistant"},
30
+ "force_paragen": False,
31
+ "force_paragen_model_slug": "",
32
+ "force_nulligen": False,
33
+ "force_rate_limit": False,
34
+ # "websocket_request_id": str(uuid.uuid4()),
35
+ }
constants/models.py CHANGED
@@ -22,6 +22,7 @@ TOKEN_LIMIT_MAP = {
22
  "mistral-7b": 32768,
23
  "openchat-3.5": 8192,
24
  "gemma-7b": 8192,
 
25
  }
26
 
27
  TOKEN_RESERVED = 20
@@ -33,6 +34,7 @@ AVAILABLE_MODELS = [
33
  "mistral-7b",
34
  "openchat-3.5",
35
  "gemma-7b",
 
36
  ]
37
 
38
  # https://platform.openai.com/docs/api-reference/models/list
@@ -72,4 +74,11 @@ AVAILABLE_MODELS_DICTS = [
72
  "created": 1700000000,
73
  "owned_by": "Google",
74
  },
 
 
 
 
 
 
 
75
  ]
 
22
  "mistral-7b": 32768,
23
  "openchat-3.5": 8192,
24
  "gemma-7b": 8192,
25
+ "gpt-3.5": 8192,
26
  }
27
 
28
  TOKEN_RESERVED = 20
 
34
  "mistral-7b",
35
  "openchat-3.5",
36
  "gemma-7b",
37
+ "gpt-3.5",
38
  ]
39
 
40
  # https://platform.openai.com/docs/api-reference/models/list
 
74
  "created": 1700000000,
75
  "owned_by": "Google",
76
  },
77
+ {
78
+ "id": "gpt-3.5",
79
+ "description": "[openai/gpt-3.5-turbo]: https://platform.openai.com/docs/models/gpt-3-5-turbo",
80
+ "object": "model",
81
+ "created": 1700000000,
82
+ "owned_by": "OpenAI",
83
+ },
84
  ]
messagers/message_outputer.py CHANGED
@@ -7,13 +7,13 @@ class OpenaiStreamOutputer:
7
  * https://platform.openai.com/docs/api-reference/chat/create
8
  """
9
 
10
- def __init__(self):
11
  self.default_data = {
12
  "created": 1700000000,
13
- "id": "chatcmpl-hugginface",
14
  "object": "chat.completion.chunk",
15
  # "content_type": "Completions",
16
- "model": "hugginface",
17
  "choices": [],
18
  }
19
 
 
7
  * https://platform.openai.com/docs/api-reference/chat/create
8
  """
9
 
10
+ def __init__(self, owned_by="huggingface", model="mixtral-8x7b"):
11
  self.default_data = {
12
  "created": 1700000000,
13
+ "id": f"chatcmpl-{owned_by}",
14
  "object": "chat.completion.chunk",
15
  # "content_type": "Completions",
16
+ "model": model,
17
  "choices": [],
18
  }
19
 
networks/huggingface_streamer.py CHANGED
@@ -23,7 +23,7 @@ class HuggingfaceStreamer:
23
  else:
24
  self.model = "default"
25
  self.model_fullname = MODEL_MAP[self.model]
26
- self.message_outputer = OpenaiStreamOutputer()
27
 
28
  if self.model == "gemma-7b":
29
  # this is not wrong, as repo `google/gemma-7b-it` is gated and must authenticate to access it
 
23
  else:
24
  self.model = "default"
25
  self.model_fullname = MODEL_MAP[self.model]
26
+ self.message_outputer = OpenaiStreamOutputer(model=self.model)
27
 
28
  if self.model == "gemma-7b":
29
  # this is not wrong, as repo `google/gemma-7b-it` is gated and must authenticate to access it
networks/openai_streamer.py ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import copy
2
+ import json
3
+ import re
4
+ import tiktoken
5
+ import uuid
6
+
7
+ from curl_cffi import requests
8
+ from tclogger import logger
9
+
10
+ from constants.envs import PROXIES
11
+ from constants.headers import OPENAI_GET_HEADERS, OPENAI_POST_DATA
12
+ from constants.models import TOKEN_LIMIT_MAP, TOKEN_RESERVED
13
+
14
+ from messagers.message_outputer import OpenaiStreamOutputer
15
+
16
+
17
+ class OpenaiRequester:
18
+ def __init__(self):
19
+ self.init_requests_params()
20
+
21
+ def init_requests_params(self):
22
+ self.api_base = "https://chat.openai.com/backend-anon"
23
+ self.api_me = f"{self.api_base}/me"
24
+ self.api_models = f"{self.api_base}/models"
25
+ self.api_chat_requirements = f"{self.api_base}/sentinel/chat-requirements"
26
+ self.api_conversation = f"{self.api_base}/conversation"
27
+ self.uuid = str(uuid.uuid4())
28
+ self.requests_headers = copy.deepcopy(OPENAI_GET_HEADERS)
29
+ extra_headers = {
30
+ "Oai-Device-Id": self.uuid,
31
+ }
32
+ self.requests_headers.update(extra_headers)
33
+
34
+ def log_request(self, url, method="GET"):
35
+ logger.note(f"> {method}:", end=" ")
36
+ logger.mesg(f"{url}", end=" ")
37
+
38
+ def log_response(self, res: requests.Response, stream=False, verbose=False):
39
+ status_code = res.status_code
40
+ status_code_str = f"[{status_code}]"
41
+
42
+ if status_code == 200:
43
+ logger_func = logger.success
44
+ else:
45
+ logger_func = logger.warn
46
+
47
+ logger_func(status_code_str)
48
+
49
+ if verbose:
50
+ if stream:
51
+ if not hasattr(self, "content_offset"):
52
+ self.content_offset = 0
53
+
54
+ for line in res.iter_lines():
55
+ line = line.decode("utf-8")
56
+ line = re.sub(r"^data:\s*", "", line)
57
+ if re.match(r"^\[DONE\]", line):
58
+ logger.success("\n[Finished]")
59
+ break
60
+ line = line.strip()
61
+ if line:
62
+ try:
63
+ data = json.loads(line, strict=False)
64
+ message_role = data["message"]["author"]["role"]
65
+ message_status = data["message"]["status"]
66
+ if (
67
+ message_role == "assistant"
68
+ and message_status == "in_progress"
69
+ ):
70
+ content = data["message"]["content"]["parts"][0]
71
+ delta_content = content[self.content_offset :]
72
+ self.content_offset = len(content)
73
+ logger_func(delta_content, end="")
74
+ except Exception as e:
75
+ logger.warn(e)
76
+ else:
77
+ logger_func(res.json())
78
+
79
+ def get_models(self):
80
+ self.log_request(self.api_models)
81
+ res = requests.get(
82
+ self.api_models,
83
+ headers=self.requests_headers,
84
+ proxies=PROXIES,
85
+ timeout=10,
86
+ impersonate="chrome120",
87
+ )
88
+ self.log_response(res)
89
+
90
+ def auth(self):
91
+ self.log_request(self.api_chat_requirements, method="POST")
92
+ res = requests.post(
93
+ self.api_chat_requirements,
94
+ headers=self.requests_headers,
95
+ proxies=PROXIES,
96
+ timeout=10,
97
+ impersonate="chrome120",
98
+ )
99
+ self.chat_requirements_token = res.json()["token"]
100
+ self.log_response(res)
101
+
102
+ def transform_messages(self, messages: list[dict]):
103
+ def get_role(role):
104
+ if role in ["system", "user", "assistant"]:
105
+ return role
106
+ else:
107
+ return "system"
108
+
109
+ new_messages = [
110
+ {
111
+ "author": {"role": get_role(message["role"])},
112
+ "content": {"content_type": "text", "parts": [message["content"]]},
113
+ "metadata": {},
114
+ }
115
+ for message in messages
116
+ ]
117
+ return new_messages
118
+
119
+ def chat_completions(self, messages: list[dict], verbose=False):
120
+ extra_headers = {
121
+ "Accept": "text/event-stream",
122
+ "Openai-Sentinel-Chat-Requirements-Token": self.chat_requirements_token,
123
+ }
124
+ requests_headers = copy.deepcopy(self.requests_headers)
125
+ requests_headers.update(extra_headers)
126
+
127
+ post_data = copy.deepcopy(OPENAI_POST_DATA)
128
+ extra_data = {
129
+ "messages": self.transform_messages(messages),
130
+ "websocket_request_id": str(uuid.uuid4()),
131
+ }
132
+ post_data.update(extra_data)
133
+
134
+ self.log_request(self.api_conversation, method="POST")
135
+ s = requests.Session()
136
+ res = s.post(
137
+ self.api_conversation,
138
+ headers=requests_headers,
139
+ json=post_data,
140
+ proxies=PROXIES,
141
+ timeout=10,
142
+ impersonate="chrome120",
143
+ stream=True,
144
+ )
145
+ if verbose:
146
+ self.log_response(res, stream=True, verbose=True)
147
+ return res
148
+
149
+
150
+ class OpenaiStreamer:
151
+ def __init__(self):
152
+ self.model = "gpt-3.5"
153
+ self.message_outputer = OpenaiStreamOutputer(owned_by="openai", model="gpt-3.5")
154
+ self.tokenizer = tiktoken.get_encoding("cl100k_base")
155
+
156
+ def count_tokens(self, messages: list[dict]):
157
+ token_count = sum(
158
+ len(self.tokenizer.encode(message["content"])) for message in messages
159
+ )
160
+ logger.note(f"Prompt Token Count: {token_count}")
161
+ return token_count
162
+
163
+ def check_token_limit(self, messages: list[dict]):
164
+ token_limit = TOKEN_LIMIT_MAP[self.model]
165
+ token_redundancy = int(
166
+ token_limit - TOKEN_RESERVED - self.count_tokens(messages)
167
+ )
168
+ if token_redundancy <= 0:
169
+ raise ValueError(f"Prompt exceeded token limit: {token_limit}")
170
+ return True
171
+
172
+ def chat_response(self, messages: list[dict]):
173
+ self.check_token_limit(messages)
174
+ requester = OpenaiRequester()
175
+ requester.auth()
176
+ return requester.chat_completions(messages, verbose=False)
177
+
178
+ def chat_return_generator(self, stream_response: requests.Response):
179
+ content_offset = 0
180
+ is_finished = False
181
+
182
+ for line in stream_response.iter_lines():
183
+ line = line.decode("utf-8")
184
+ line = re.sub(r"^data:\s*", "", line)
185
+ line = line.strip()
186
+
187
+ if not line:
188
+ continue
189
+
190
+ if re.match(r"^\[DONE\]", line):
191
+ content_type = "Finished"
192
+ delta_content = ""
193
+ logger.success("\n[Finished]")
194
+ is_finished = True
195
+ else:
196
+ content_type = "Completions"
197
+ try:
198
+ data = json.loads(line, strict=False)
199
+ message_role = data["message"]["author"]["role"]
200
+ message_status = data["message"]["status"]
201
+ if message_role == "assistant" and message_status == "in_progress":
202
+ content = data["message"]["content"]["parts"][0]
203
+ if not len(content):
204
+ continue
205
+ delta_content = content[content_offset:]
206
+ content_offset = len(content)
207
+ logger.success(delta_content, end="")
208
+ else:
209
+ continue
210
+ except Exception as e:
211
+ logger.warn(e)
212
+
213
+ output = self.message_outputer.output(
214
+ content=delta_content, content_type=content_type
215
+ )
216
+ yield output
217
+
218
+ if not is_finished:
219
+ yield self.message_outputer.output(content="", content_type="Finished")