yangtb24 commited on
Commit
d5d4596
·
verified ·
1 Parent(s): cd222f4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +392 -3
app.py CHANGED
@@ -787,13 +787,17 @@ def handsome_chat_completions():
787
  data = request.get_json()
788
  if not data or 'model' not in data:
789
  return jsonify({"error": "Invalid request data"}), 400
790
- if data['model'] not in models["text"] and data['model'] not in models["image"]:
791
- return jsonify({"error": "Invalid model"}), 400
 
 
 
792
 
793
  model_name = data['model']
 
794
 
795
  request_type = determine_request_type(
796
- model_name,
797
  models["text"] + models["image"],
798
  models["free_text"] + models["free_image"]
799
  )
@@ -815,6 +819,391 @@ def handsome_chat_completions():
815
  "Authorization": f"Bearer {api_key}",
816
  "Content-Type": "application/json"
817
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
818
 
819
  if model_name in models["image"]:
820
  if isinstance(data.get("messages"), list):
 
787
  data = request.get_json()
788
  if not data or 'model' not in data:
789
  return jsonify({"error": "Invalid request data"}), 400
790
+ if model_name not in models["text"] and model_name not in models["image"]:
791
+ if "DeepSeek-R1" in model_name and (model_name.endswith("-openwebui") or model_name.endswith("-thinking")):
792
+ pass
793
+ else:
794
+ return jsonify({"error": "Invalid model"}), 400
795
 
796
  model_name = data['model']
797
+ model_realname = model_name.replace("-thinking", "").replace("-openwebui", "")
798
 
799
  request_type = determine_request_type(
800
+ model_realname,
801
  models["text"] + models["image"],
802
  models["free_text"] + models["free_image"]
803
  )
 
819
  "Authorization": f"Bearer {api_key}",
820
  "Content-Type": "application/json"
821
  }
822
+
823
+ if "DeepSeek-R1" in model_name and ("thinking" in model_name or "openwebui" in model_name):
824
+ data['model'] = model_realname
825
+
826
+ start_time = time.time()
827
+ response = requests.post(
828
+ TEST_MODEL_ENDPOINT,
829
+ headers=headers,
830
+ json=data,
831
+ stream=data.get("stream", False),
832
+ timeout=120
833
+ )
834
+
835
+ if response.status_code == 429:
836
+ return jsonify(response.json()), 429
837
+
838
+ if data.get("stream", False):
839
+ def generate():
840
+ if model_name.endswith("-openwebui"):
841
+ first_chunk_time = None
842
+ full_response_content = ""
843
+ reasoning_content_accumulated = ""
844
+ content_accumulated = ""
845
+ first_reasoning_chunk = True
846
+
847
+ for chunk in response.iter_lines():
848
+ if chunk:
849
+ if first_chunk_time is None:
850
+ first_chunk_time = time.time()
851
+ full_response_content += chunk.decode("utf-8")
852
+
853
+ for line in chunk.decode("utf-8").splitlines():
854
+ if line.startswith("data:"):
855
+ try:
856
+ chunk_json = json.loads(line.lstrip("data: ").strip())
857
+ if "choices" in chunk_json and len(chunk_json["choices"]) > 0:
858
+ delta = chunk_json["choices"][0].get("delta", {})
859
+
860
+ if delta.get("reasoning_content") is not None:
861
+ reasoning_chunk = delta["reasoning_content"]
862
+ if first_reasoning_chunk:
863
+ think_chunk = f"<"
864
+ yield f"data: {json.dumps({'choices': [{'delta': {'content': think_chunk}, 'index': 0}]})}\n\n"
865
+ think_chunk = f"think"
866
+ yield f"data: {json.dumps({'choices': [{'delta': {'content': think_chunk}, 'index': 0}]})}\n\n"
867
+ think_chunk = f">\n"
868
+ yield f"data: {json.dumps({'choices': [{'delta': {'content': think_chunk}, 'index': 0}]})}\n\n"
869
+ first_reasoning_chunk = False
870
+ yield f"data: {json.dumps({'choices': [{'delta': {'content': reasoning_chunk}, 'index': 0}]})}\n\n"
871
+
872
+ if delta.get("content") is not None:
873
+ if not first_reasoning_chunk:
874
+ reasoning_chunk = f"\n</think>\n"
875
+ yield f"data: {json.dumps({'choices': [{'delta': {'content': reasoning_chunk}, 'index': 0}]})}\n\n"
876
+ first_reasoning_chunk = True
877
+ yield f"data: {json.dumps({'choices': [{'delta': {'content': delta["content"]}, 'index': 0}]})}\n\n"
878
+
879
+ except (KeyError, ValueError, json.JSONDecodeError) as e:
880
+ continue
881
+
882
+ end_time = time.time()
883
+ first_token_time = (
884
+ first_chunk_time - start_time
885
+ if first_chunk_time else 0
886
+ )
887
+ total_time = end_time - start_time
888
+
889
+ prompt_tokens = 0
890
+ completion_tokens = 0
891
+ for line in full_response_content.splitlines():
892
+ if line.startswith("data:"):
893
+ line = line[5:].strip()
894
+ if line == "[DONE]":
895
+ continue
896
+ try:
897
+ response_json = json.loads(line)
898
+
899
+ if (
900
+ "usage" in response_json and
901
+ "completion_tokens" in response_json["usage"]
902
+ ):
903
+ completion_tokens += response_json[
904
+ "usage"
905
+ ]["completion_tokens"]
906
+ if (
907
+ "usage" in response_json and
908
+ "prompt_tokens" in response_json["usage"]
909
+ ):
910
+ prompt_tokens = response_json[
911
+ "usage"
912
+ ]["prompt_tokens"]
913
+
914
+ except (
915
+ KeyError,
916
+ ValueError,
917
+ IndexError
918
+ ) as e:
919
+ logging.error(
920
+ f"解析流式响应单行 JSON 失败: {e}, "
921
+ f"行内容: {line}"
922
+ )
923
+
924
+ user_content = ""
925
+ messages = data.get("messages", [])
926
+ for message in messages:
927
+ if message["role"] == "user":
928
+ if isinstance(message["content"], str):
929
+ user_content += message["content"] + " "
930
+ elif isinstance(message["content"], list):
931
+ for item in message["content"]:
932
+ if (
933
+ isinstance(item, dict) and
934
+ item.get("type") == "text"
935
+ ):
936
+ user_content += (
937
+ item.get("text", "") +
938
+ " "
939
+ )
940
+
941
+ user_content = user_content.strip()
942
+
943
+ user_content_replaced = user_content.replace(
944
+ '\n', '\\n'
945
+ ).replace('\r', '\\n')
946
+ response_content_replaced = (f"```Thinking\n{reasoning_content_accumulated}\n```\n" if reasoning_content_accumulated else "") + content_accumulated
947
+ response_content_replaced = response_content_replaced.replace(
948
+ '\n', '\\n'
949
+ ).replace('\r', '\\n')
950
+
951
+ logging.info(
952
+ f"使用的key: {api_key}, "
953
+ f"提示token: {prompt_tokens}, "
954
+ f"输出token: {completion_tokens}, "
955
+ f"首字用时: {first_token_time:.4f}秒, "
956
+ f"总共用时: {total_time:.4f}秒, "
957
+ f"使用的模型: {model_name}, "
958
+ f"用户的内容: {user_content_replaced}, "
959
+ f"输出的内容: {response_content_replaced}"
960
+ )
961
+
962
+ with data_lock:
963
+ request_timestamps.append(time.time())
964
+ token_counts.append(prompt_tokens + completion_tokens)
965
+
966
+ yield "data: [DONE]\n\n"
967
+
968
+ return Response(
969
+ stream_with_context(generate()),
970
+ content_type="text/event-stream"
971
+ )
972
+
973
+ first_chunk_time = None
974
+ full_response_content = ""
975
+ reasoning_content_accumulated = ""
976
+ content_accumulated = ""
977
+ first_reasoning_chunk = True
978
+
979
+ for chunk in response.iter_lines():
980
+ if chunk:
981
+ if first_chunk_time is None:
982
+ first_chunk_time = time.time()
983
+ full_response_content += chunk.decode("utf-8")
984
+
985
+ for line in chunk.decode("utf-8").splitlines():
986
+ if line.startswith("data:"):
987
+ try:
988
+ chunk_json = json.loads(line.lstrip("data: ").strip())
989
+ if "choices" in chunk_json and len(chunk_json["choices"]) > 0:
990
+ delta = chunk_json["choices"][0].get("delta", {})
991
+
992
+ if delta.get("reasoning_content") is not None:
993
+ reasoning_chunk = delta["reasoning_content"]
994
+ reasoning_chunk = reasoning_chunk.replace('\n', '\n> ')
995
+ if first_reasoning_chunk:
996
+ reasoning_chunk = "> " + reasoning_chunk
997
+ first_reasoning_chunk = False
998
+ yield f"data: {json.dumps({'choices': [{'delta': {'content': reasoning_chunk}, 'index': 0}]})}\n\n"
999
+
1000
+ if delta.get("content") is not None:
1001
+ if not first_reasoning_chunk:
1002
+ yield f"data: {json.dumps({'choices': [{'delta': {'content': '\n\n'}, 'index': 0}]})}\n\n"
1003
+ first_reasoning_chunk = True
1004
+ yield f"data: {json.dumps({'choices': [{'delta': {'content': delta["content"]}, 'index': 0}]})}\n\n"
1005
+
1006
+ except (KeyError, ValueError, json.JSONDecodeError) as e:
1007
+ continue
1008
+
1009
+ end_time = time.time()
1010
+ first_token_time = (
1011
+ first_chunk_time - start_time
1012
+ if first_chunk_time else 0
1013
+ )
1014
+ total_time = end_time - start_time
1015
+
1016
+ prompt_tokens = 0
1017
+ completion_tokens = 0
1018
+ for line in full_response_content.splitlines():
1019
+ if line.startswith("data:"):
1020
+ line = line[5:].strip()
1021
+ if line == "[DONE]":
1022
+ continue
1023
+ try:
1024
+ response_json = json.loads(line)
1025
+
1026
+ if (
1027
+ "usage" in response_json and
1028
+ "completion_tokens" in response_json["usage"]
1029
+ ):
1030
+ completion_tokens += response_json[
1031
+ "usage"
1032
+ ]["completion_tokens"]
1033
+ if (
1034
+ "usage" in response_json and
1035
+ "prompt_tokens" in response_json["usage"]
1036
+ ):
1037
+ prompt_tokens = response_json[
1038
+ "usage"
1039
+ ]["prompt_tokens"]
1040
+
1041
+ except (
1042
+ KeyError,
1043
+ ValueError,
1044
+ IndexError
1045
+ ) as e:
1046
+ logging.error(
1047
+ f"解析流式响应单行 JSON 失败: {e}, "
1048
+ f"行内容: {line}"
1049
+ )
1050
+
1051
+ user_content = ""
1052
+ messages = data.get("messages", [])
1053
+ for message in messages:
1054
+ if message["role"] == "user":
1055
+ if isinstance(message["content"], str):
1056
+ user_content += message["content"] + " "
1057
+ elif isinstance(message["content"], list):
1058
+ for item in message["content"]:
1059
+ if (
1060
+ isinstance(item, dict) and
1061
+ item.get("type") == "text"
1062
+ ):
1063
+ user_content += (
1064
+ item.get("text", "") +
1065
+ " "
1066
+ )
1067
+
1068
+ user_content = user_content.strip()
1069
+
1070
+ user_content_replaced = user_content.replace(
1071
+ '\n', '\\n'
1072
+ ).replace('\r', '\\n')
1073
+ response_content_replaced = (f"```Thinking\n{reasoning_content_accumulated}\n```\n" if reasoning_content_accumulated else "") + content_accumulated
1074
+ response_content_replaced = response_content_replaced.replace(
1075
+ '\n', '\\n'
1076
+ ).replace('\r', '\\n')
1077
+
1078
+ logging.info(
1079
+ f"使用的key: {api_key}, "
1080
+ f"提示token: {prompt_tokens}, "
1081
+ f"输出token: {completion_tokens}, "
1082
+ f"首字用时: {first_token_time:.4f}秒, "
1083
+ f"总共用时: {total_time:.4f}秒, "
1084
+ f"使用的模型: {model_name}, "
1085
+ f"用户的内容: {user_content_replaced}, "
1086
+ f"输出的内容: {response_content_replaced}"
1087
+ )
1088
+
1089
+ with data_lock:
1090
+ request_timestamps.append(time.time())
1091
+ token_counts.append(prompt_tokens + completion_tokens)
1092
+
1093
+ yield "data: [DONE]\n\n"
1094
+
1095
+ return Response(
1096
+ stream_with_context(generate()),
1097
+ content_type="text/event-stream"
1098
+ )
1099
+ else:
1100
+ response.raise_for_status()
1101
+ end_time = time.time()
1102
+ response_json = response.json()
1103
+ total_time = end_time - start_time
1104
+
1105
+ try:
1106
+ prompt_tokens = response_json["usage"]["prompt_tokens"]
1107
+ completion_tokens = response_json["usage"]["completion_tokens"]
1108
+ response_content = ""
1109
+
1110
+ if model_name.endswith("-thinking") and "choices" in response_json and len(response_json["choices"]) > 0:
1111
+ choice = response_json["choices"][0]
1112
+ if "message" in choice:
1113
+ if "reasoning_content" in choice["message"]:
1114
+ reasoning_content = choice["message"]["reasoning_content"]
1115
+ reasoning_content = reasoning_content.replace('\n', '\n> ')
1116
+ reasoning_content = '> ' + reasoning_content
1117
+ formatted_reasoning = f"{reasoning_content}\n"
1118
+ response_content += formatted_reasoning + "\n"
1119
+ if "content" in choice["message"]:
1120
+ response_content += choice["message"]["content"]
1121
+ elif model_name.endswith("-openwebui") and "choices" in response_json and len(response_json["choices"]) > 0:
1122
+ choice = response_json["choices"][0]
1123
+ if "message" in choice:
1124
+ if "reasoning_content" in choice["message"]:
1125
+ reasoning_content = choice["message"]["reasoning_content"]
1126
+ response_content += f"<think>\n{reasoning_content}\n</think>\n"
1127
+ if "content" in choice["message"]:
1128
+ response_content += choice["message"]["content"]
1129
+
1130
+ except (KeyError, ValueError, IndexError) as e:
1131
+ logging.error(
1132
+ f"解析非流式响应 JSON 失败: {e}, "
1133
+ f"完整内容: {response_json}"
1134
+ )
1135
+ prompt_tokens = 0
1136
+ completion_tokens = 0
1137
+ response_content = ""
1138
+
1139
+ user_content = ""
1140
+ messages = data.get("messages", [])
1141
+ for message in messages:
1142
+ if message["role"] == "user":
1143
+ if isinstance(message["content"], str):
1144
+ user_content += message["content"] + " "
1145
+ elif isinstance(message["content"], list):
1146
+ for item in message["content"]:
1147
+ if (
1148
+ isinstance(item, dict) and
1149
+ item.get("type") == "text"
1150
+ ):
1151
+ user_content += (
1152
+ item.get("text", "") +
1153
+ " "
1154
+ )
1155
+
1156
+ user_content = user_content.strip()
1157
+
1158
+ user_content_replaced = user_content.replace(
1159
+ '\n', '\\n'
1160
+ ).replace('\r', '\\n')
1161
+ response_content_replaced = response_content.replace(
1162
+ '\n', '\\n'
1163
+ ).replace('\r', '\\n')
1164
+
1165
+ logging.info(
1166
+ f"使用的key: {api_key}, "
1167
+ f"提示token: {prompt_tokens}, "
1168
+ f"输出token: {completion_tokens}, "
1169
+ f"首字用时: 0, "
1170
+ f"总共用时: {total_time:.4f}秒, "
1171
+ f"使用的模型: {model_name}, "
1172
+ f"用户的内容: {user_content_replaced}, "
1173
+ f"输出的内容: {response_content_replaced}"
1174
+ )
1175
+ with data_lock:
1176
+ request_timestamps.append(time.time())
1177
+ token_counts.append(prompt_tokens + completion_tokens)
1178
+
1179
+ formatted_response = {
1180
+ "id": response_json.get("id", ""),
1181
+ "object": "chat.completion",
1182
+ "created": response_json.get("created", int(time.time())),
1183
+ "model": model_name,
1184
+ "choices": [
1185
+ {
1186
+ "index": 0,
1187
+ "message": {
1188
+ "role": "assistant",
1189
+ "content": response_content
1190
+ },
1191
+ "finish_reason": "stop"
1192
+ }
1193
+ ],
1194
+ "usage": {
1195
+ "prompt_tokens": prompt_tokens,
1196
+ "completion_tokens": completion_tokens,
1197
+ "total_tokens": prompt_tokens + completion_tokens
1198
+ }
1199
+ }
1200
+
1201
+ return jsonify(formatted_response)
1202
+
1203
+ except requests.exceptions.RequestException as e:
1204
+ logging.error(f"请求转发异常: {e}")
1205
+ return jsonify({"error": str(e)}), 500
1206
+
1207
 
1208
  if model_name in models["image"]:
1209
  if isinstance(data.get("messages"), list):