Senkaro commited on
Commit
0340792
1 Parent(s): f9171a8

Upload bot_markdown.py

Browse files
Files changed (1) hide show
  1. bot_markdown.py +50 -0
bot_markdown.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from telethon.extensions import markdown
2
+ from telethon import types
3
+ import re
4
+
5
+
6
+ def extract_code(text):
7
+ pattern = r'```([\w+#]*)\n(.+?)```'
8
+ matches = re.findall(pattern, text, flags=re.DOTALL)
9
+ languages = []
10
+ for match in matches:
11
+ language = match[0]
12
+ code = match[1]
13
+ if language and language[0].islower():
14
+ code = code.replace(language, '', 1).lstrip()
15
+ languages.append(language.lower())
16
+ text = text.replace(f'```{language}\n{code}```', f'```{code}```')
17
+ else:
18
+ languages.append('')
19
+ pattern = r'```(.+?)```'
20
+ matches = re.findall(pattern, text, flags=re.DOTALL)
21
+ languages.extend('' for _ in matches)
22
+ return text, languages
23
+
24
+
25
+ def parse_footnotes(text):
26
+ table = str.maketrans("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
27
+ "⁰¹²³⁴⁵⁶⁷⁸⁹ᵃᵇᶜᵈᵉᶠᵍʰᶦʲᵏˡᵐⁿᵒᵖᵠʳˢᵗᵘᵛʷˣʸᶻᴬᴮᶜᴰᴱᶠᴳᴴᴵᴶᴷᴸᴹᴺᴼᴾᵠᴿˢᵀᵁⱽᵂˣʸᶻ")
28
+ return re.sub(r"\[\^(\w+)\^\]", lambda match: f" {match.group(1).translate(table)}", text)
29
+
30
+ return pattern.sub(replace, text)
31
+
32
+
33
+ class SydMarkdown:
34
+ @staticmethod
35
+ def parse(text):
36
+ text = parse_footnotes(text)
37
+ text, languages = extract_code(text)
38
+ index = 0
39
+ text, entities = markdown.parse(text)
40
+ for i, e in enumerate(entities):
41
+ if isinstance(e, types.MessageEntityPre):
42
+ if language := languages[index]:
43
+ entities[i] = types.MessageEntityPre(
44
+ e.offset, e.length, language)
45
+ index += 1
46
+ return text, entities
47
+
48
+ @staticmethod
49
+ def unparse(text, entities):
50
+ return markdown.unparse(text, entities)