import markdown import re import os import math from textwrap import dedent from functools import lru_cache from pymdownx.superfences import fence_div_format, fence_code_format from latex2mathml.converter import convert as tex2mathml from shared_utils.config_loader import get_conf as get_conf pj = os.path.join default_user_name = 'default_user' markdown_extension_configs = { 'mdx_math': { 'enable_dollar_delimiter': True, 'use_gitlab_delimiters': False, }, } code_highlight_configs = { "pymdownx.superfences": { 'css_class': 'codehilite', "custom_fences": [ { 'name': 'mermaid', 'class': 'mermaid', 'format': fence_code_format } ] }, "pymdownx.highlight": { 'css_class': 'codehilite', 'guess_lang': True, # 'auto_title': True, # 'linenums': True } } def text_divide_paragraph(text): """ 将文本按照段落分隔符分割开,生成带有段落标签的HTML代码。 """ pre = '
.*?)(?<=\n) # the code block
(?P=fence)[ ]*$ # closing fence
'''),
re.MULTILINE | re.DOTALL | re.VERBOSE
)
def get_line_range(re_match_obj, txt):
start_pos, end_pos = re_match_obj.regs[0]
num_newlines_before = txt[:start_pos+1].count('\n')
line_start = num_newlines_before
line_end = num_newlines_before + txt[start_pos:end_pos].count('\n')+1
return line_start, line_end
def fix_code_segment_indent(txt):
lines = []
change_any = False
txt_tmp = txt
while True:
re_match_obj = FENCED_BLOCK_RE.search(txt_tmp)
if not re_match_obj: break
if len(lines) == 0: lines = txt.split("\n")
# 清空 txt_tmp 对应的位置方便下次搜索
start_pos, end_pos = re_match_obj.regs[0]
txt_tmp = txt_tmp[:start_pos] + ' '*(end_pos-start_pos) + txt_tmp[end_pos:]
line_start, line_end = get_line_range(re_match_obj, txt)
# 获取公共缩进
shared_indent_cnt = 1e5
for i in range(line_start, line_end):
stripped_string = lines[i].lstrip()
num_spaces = len(lines[i]) - len(stripped_string)
if num_spaces < shared_indent_cnt:
shared_indent_cnt = num_spaces
# 修复缩进
if (shared_indent_cnt < 1e5) and (shared_indent_cnt % 4) == 3:
num_spaces_should_be = math.ceil(shared_indent_cnt / 4) * 4
for i in range(line_start, line_end):
add_n = num_spaces_should_be - shared_indent_cnt
lines[i] = ' ' * add_n + lines[i]
if not change_any: # 遇到第一个
change_any = True
if change_any:
return '\n'.join(lines)
else:
return txt
@lru_cache(maxsize=128) # 使用 lru缓存 加快转换速度
def markdown_convertion(txt):
"""
将Markdown格式的文本转换为HTML格式。如果包含数学公式,则先将公式转换为HTML格式。
"""
pre = ''
suf = ''
if txt.startswith(pre) and txt.endswith(suf):
# print('警告,输入了已经经过转化的字符串,二次转化可能出问题')
return txt # 已经被转化过,不需要再次转化
find_equation_pattern = r'