# -*- coding:utf-8 -*- from __future__ import annotations from typing import TYPE_CHECKING, Any, Callable, Dict, List, Tuple, Type import logging import commentjson as json import os import datetime import csv import requests import re import html import hashlib import gradio as gr from pypinyin import lazy_pinyin import tiktoken from markdown import markdown from pygments import highlight from pygments.lexers import get_lexer_by_name from pygments.formatters import HtmlFormatter import pandas as pd from modules.presets import * from . import shared from modules.config import retrieve_proxy, hide_history_when_not_logged_in if TYPE_CHECKING: from typing import TypedDict class DataframeData(TypedDict): headers: List[str] data: List[List[str | int | bool]] def predict(current_model, *args): iter = current_model.predict(*args) for i in iter: yield i def billing_info(current_model): return current_model.billing_info() def set_key(current_model, *args): return current_model.set_key(*args) def load_chat_history(current_model, *args): return current_model.load_chat_history(*args) def delete_chat_history(current_model, *args): return current_model.delete_chat_history(*args) def interrupt(current_model, *args): return current_model.interrupt(*args) def reset(current_model, *args): return current_model.reset(*args) def retry(current_model, *args): iter = current_model.retry(*args) for i in iter: yield i def delete_first_conversation(current_model, *args): return current_model.delete_first_conversation(*args) def delete_last_conversation(current_model, *args): return current_model.delete_last_conversation(*args) def set_system_prompt(current_model, *args): return current_model.set_system_prompt(*args) def rename_chat_history(current_model, *args): return current_model.rename_chat_history(*args) def auto_name_chat_history(current_model, *args): return current_model.auto_name_chat_history(*args) def export_markdown(current_model, *args): return current_model.export_markdown(*args) def upload_chat_history(current_model, *args): return current_model.load_chat_history(*args) def set_token_upper_limit(current_model, *args): return current_model.set_token_upper_limit(*args) def set_temperature(current_model, *args): current_model.set_temperature(*args) def set_top_p(current_model, *args): current_model.set_top_p(*args) def set_n_choices(current_model, *args): current_model.set_n_choices(*args) def set_stop_sequence(current_model, *args): current_model.set_stop_sequence(*args) def set_max_tokens(current_model, *args): current_model.set_max_tokens(*args) def set_presence_penalty(current_model, *args): current_model.set_presence_penalty(*args) def set_frequency_penalty(current_model, *args): current_model.set_frequency_penalty(*args) def set_logit_bias(current_model, *args): current_model.set_logit_bias(*args) def set_user_identifier(current_model, *args): current_model.set_user_identifier(*args) def set_single_turn(current_model, *args): current_model.set_single_turn(*args) def handle_file_upload(current_model, *args): return current_model.handle_file_upload(*args) def handle_summarize_index(current_model, *args): return current_model.summarize_index(*args) def like(current_model, *args): return current_model.like(*args) def dislike(current_model, *args): return current_model.dislike(*args) def count_token(input_str): encoding = tiktoken.get_encoding("cl100k_base") if type(input_str) == dict: input_str = f"role: {input_str['role']}, content: {input_str['content']}" length = len(encoding.encode(input_str)) return length def markdown_to_html_with_syntax_highlight(md_str): # deprecated def replacer(match): lang = match.group(1) or "text" code = match.group(2) try: lexer = get_lexer_by_name(lang, stripall=True) except ValueError: lexer = get_lexer_by_name("text", stripall=True) formatter = HtmlFormatter() highlighted_code = highlight(code, lexer, formatter) return f'
{highlighted_code}
'
code_block_pattern = r"```(\w+)?\n([\s\S]+?)\n```"
md_str = re.sub(code_block_pattern, replacer, md_str, flags=re.MULTILINE)
html_str = markdown(md_str)
return html_str
def normalize_markdown(md_text: str) -> str: # deprecated
lines = md_text.split("\n")
normalized_lines = []
inside_list = False
for i, line in enumerate(lines):
if re.match(r"^(\d+\.|-|\*|\+)\s", line.strip()):
if not inside_list and i > 0 and lines[i - 1].strip() != "":
normalized_lines.append("")
inside_list = True
normalized_lines.append(line)
elif inside_list and line.strip() == "":
if i < len(lines) - 1 and not re.match(
r"^(\d+\.|-|\*|\+)\s", lines[i + 1].strip()
):
normalized_lines.append(line)
continue
else:
inside_list = False
normalized_lines.append(line)
return "\n".join(normalized_lines)
def convert_mdtext(md_text): # deprecated
code_block_pattern = re.compile(r"```(.*?)(?:```|$)", re.DOTALL)
inline_code_pattern = re.compile(r"`(.*?)`", re.DOTALL)
code_blocks = code_block_pattern.findall(md_text)
non_code_parts = code_block_pattern.split(md_text)[::2]
result = []
raw = f' '
for non_code, code in zip(non_code_parts, code_blocks + [""]):
if non_code.strip():
non_code = normalize_markdown(non_code)
result.append(markdown(non_code, extensions=["tables"]))
if code.strip():
# _, code = detect_language(code) # 暂时去除代码高亮功能,因为在大段代码的情况下会出现问题
# code = code.replace("\n\n", "\n") # 暂时去除代码中的空行,因为在大段代码的情况下会出现问题
code = f"\n```{code}\n\n```"
code = markdown_to_html_with_syntax_highlight(code)
result.append(code)
result = "".join(result)
output = f' '
output += raw
output += ALREADY_CONVERTED_MARK
return output
def clip_rawtext(chat_message, need_escape=True):
# first, clip hr line
hr_pattern = r'\n\n.*?<\/p>)' # agent_matches = re.findall(agent_prefix_pattern, message_clipped) agent_parts = re.split(agent_prefix_pattern, message_clipped, flags=re.DOTALL) final_message = "" for i, part in enumerate(agent_parts): if i % 2 == 0: if part != "" and part != "\n": final_message += f'
{escape_markdown(part)}' if need_escape else f'
{part}' else: final_message += part return final_message def convert_bot_before_marked(chat_message): """ 注意不能给输出加缩进, 否则会被marked解析成代码块 """ if '