|
import re |
|
from string import punctuation |
|
|
|
def escape_tags_and_content(text): |
|
"""Escape tags and their content containing text, which is not written in natural language, such as code snippets""" |
|
|
|
NO_TEXT_TAGS = "code", "noformat" |
|
for tag in NO_TEXT_TAGS: |
|
regex_matching_tag = re.compile("\{%s(.*?)\}(.*?)\{%s\}" % (tag, tag), re.DOTALL) |
|
text = re.sub(regex_matching_tag, "", text) |
|
|
|
return text |
|
|
|
def escape_tags(text): |
|
"""Escape markup tags, but retain their content""" |
|
|
|
ESCAPE_TAGS = "color", "quote", "anchor", "panel" |
|
for tag in ESCAPE_TAGS: |
|
text = re.sub("\{%s(.*?)\}" % tag, "", text) |
|
|
|
return text |
|
|
|
def escape_strings(text): |
|
"""Escape line breaks, tabulators, slashes and JIRA heading markup symbols""" |
|
|
|
ESCAPE_STRINGS = "\\r", "\\n", "\\t", "\\f", "\\v", "\"", "\\\\", "h1. ", "h2. ", "h3. ", "h4. ", "h5. ", "h6. " |
|
for escape_string in ESCAPE_STRINGS: |
|
text = text.replace(escape_string, " ") |
|
|
|
return text |
|
|
|
def escape_links(text): |
|
"""Escape external and internal links, recognized by JIRA markup or leading 'http://' or 'https://' """ |
|
|
|
LINK_STARTERS = r"\#", r"\^", r"http\:\/\/", r"https\:\/\/", r"malto\:", r"file\:", r"\~" |
|
for link_starter in LINK_STARTERS: |
|
text = re.sub("\[(.*?\\|)?%s(.*?)\]" % link_starter, "", text) |
|
text = re.sub(r"\bhttps?://\S+", "", text) |
|
|
|
return text |
|
|
|
def escape_hex_character_codes(text): |
|
"""Escape characters outside the latin alphabet which are converted to hex code representation""" |
|
|
|
return re.sub(r"\\x\w\w", "", text) |
|
|
|
def escape_punctuation_boundaries(text): |
|
"""Remove all punctuation marks from the beginning and end of words, |
|
except for trailing period at the end of words""" |
|
|
|
return " ".join([word.strip(punctuation.replace(".", "")).lstrip(".") for word in text.split()]) |
|
|
|
def escape_odd_spaces(text): |
|
"""Replace several consequent spaces with one space |
|
and remove spaces from string start and end""" |
|
|
|
text = re.sub(r"\s+", " ", text) |
|
text = text.strip() |
|
|
|
return text |