diff options
| author | django-bot <ops@djangoproject.com> | 2022-02-03 20:24:19 +0100 |
|---|---|---|
| committer | Mariusz Felisiak <felisiak.mariusz@gmail.com> | 2022-02-07 20:37:05 +0100 |
| commit | 9c19aff7c7561e3a82978a272ecdaad40dda5c00 (patch) | |
| tree | f0506b668a013d0063e5fba3dbf4863b466713ba /django/utils/html.py | |
| parent | f68fa8b45dfac545cfc4111d4e52804c86db68d3 (diff) | |
Refs #33476 -- Reformatted code with Black.
Diffstat (limited to 'django/utils/html.py')
| -rw-r--r-- | django/utils/html.py | 160 |
1 files changed, 87 insertions, 73 deletions
diff --git a/django/utils/html.py b/django/utils/html.py index be9f22312e..d228e4c7bc 100644 --- a/django/utils/html.py +++ b/django/utils/html.py @@ -4,9 +4,7 @@ import html import json import re from html.parser import HTMLParser -from urllib.parse import ( - parse_qsl, quote, unquote, urlencode, urlsplit, urlunsplit, -) +from urllib.parse import parse_qsl, quote, unquote, urlencode, urlsplit, urlunsplit from django.utils.encoding import punycode from django.utils.functional import Promise, keep_lazy, keep_lazy_text @@ -30,22 +28,22 @@ def escape(text): _js_escapes = { - ord('\\'): '\\u005C', - ord('\''): '\\u0027', - ord('"'): '\\u0022', - ord('>'): '\\u003E', - ord('<'): '\\u003C', - ord('&'): '\\u0026', - ord('='): '\\u003D', - ord('-'): '\\u002D', - ord(';'): '\\u003B', - ord('`'): '\\u0060', - ord('\u2028'): '\\u2028', - ord('\u2029'): '\\u2029' + ord("\\"): "\\u005C", + ord("'"): "\\u0027", + ord('"'): "\\u0022", + ord(">"): "\\u003E", + ord("<"): "\\u003C", + ord("&"): "\\u0026", + ord("="): "\\u003D", + ord("-"): "\\u002D", + ord(";"): "\\u003B", + ord("`"): "\\u0060", + ord("\u2028"): "\\u2028", + ord("\u2029"): "\\u2029", } # Escape every ASCII character with a value less than 32. -_js_escapes.update((ord('%c' % z), '\\u%04X' % z) for z in range(32)) +_js_escapes.update((ord("%c" % z), "\\u%04X" % z) for z in range(32)) @keep_lazy(str, SafeString) @@ -55,9 +53,9 @@ def escapejs(value): _json_script_escapes = { - ord('>'): '\\u003E', - ord('<'): '\\u003C', - ord('&'): '\\u0026', + ord(">"): "\\u003E", + ord("<"): "\\u003C", + ord("&"): "\\u0026", } @@ -68,6 +66,7 @@ def json_script(value, element_id=None): the escaped JSON in a script tag. """ from django.core.serializers.json import DjangoJSONEncoder + json_str = json.dumps(value, cls=DjangoJSONEncoder).translate(_json_script_escapes) if element_id: template = '<script id="{}" type="application/json">{}</script>' @@ -87,7 +86,7 @@ def conditional_escape(text): """ if isinstance(text, Promise): text = str(text) - if hasattr(text, '__html__'): + if hasattr(text, "__html__"): return text.__html__() else: return escape(text) @@ -118,22 +117,23 @@ def format_html_join(sep, format_string, args_generator): format_html_join('\n', "<li>{} {}</li>", ((u.first_name, u.last_name) for u in users)) """ - return mark_safe(conditional_escape(sep).join( - format_html(format_string, *args) - for args in args_generator - )) + return mark_safe( + conditional_escape(sep).join( + format_html(format_string, *args) for args in args_generator + ) + ) @keep_lazy_text def linebreaks(value, autoescape=False): """Convert newlines into <p> and <br>s.""" value = normalize_newlines(value) - paras = re.split('\n{2,}', str(value)) + paras = re.split("\n{2,}", str(value)) if autoescape: - paras = ['<p>%s</p>' % escape(p).replace('\n', '<br>') for p in paras] + paras = ["<p>%s</p>" % escape(p).replace("\n", "<br>") for p in paras] else: - paras = ['<p>%s</p>' % p.replace('\n', '<br>') for p in paras] - return '\n\n'.join(paras) + paras = ["<p>%s</p>" % p.replace("\n", "<br>") for p in paras] + return "\n\n".join(paras) class MLStripper(HTMLParser): @@ -146,13 +146,13 @@ class MLStripper(HTMLParser): self.fed.append(d) def handle_entityref(self, name): - self.fed.append('&%s;' % name) + self.fed.append("&%s;" % name) def handle_charref(self, name): - self.fed.append('&#%s;' % name) + self.fed.append("&#%s;" % name) def get_data(self): - return ''.join(self.fed) + return "".join(self.fed) def _strip_once(value): @@ -171,9 +171,9 @@ def strip_tags(value): # Note: in typical case this loop executes _strip_once once. Loop condition # is redundant, but helps to reduce number of executions of _strip_once. value = str(value) - while '<' in value and '>' in value: + while "<" in value and ">" in value: new_value = _strip_once(value) - if value.count('<') == new_value.count('<'): + if value.count("<") == new_value.count("<"): # _strip_once wasn't able to detect more tags. break value = new_value @@ -183,17 +183,18 @@ def strip_tags(value): @keep_lazy_text def strip_spaces_between_tags(value): """Return the given HTML with spaces between tags removed.""" - return re.sub(r'>\s+<', '><', str(value)) + return re.sub(r">\s+<", "><", str(value)) def smart_urlquote(url): """Quote a URL if it isn't already quoted.""" + def unquote_quote(segment): segment = unquote(segment) # Tilde is part of RFC3986 Unreserved Characters # https://tools.ietf.org/html/rfc3986#section-2.3 # See also https://bugs.python.org/issue16285 - return quote(segment, safe=RFC3986_SUBDELIMS + RFC3986_GENDELIMS + '~') + return quote(segment, safe=RFC3986_SUBDELIMS + RFC3986_GENDELIMS + "~") # Handle IDN before quoting. try: @@ -210,8 +211,10 @@ def smart_urlquote(url): if query: # Separately unquoting key/value, so as to not mix querystring separators # included in query values. See #22267. - query_parts = [(unquote(q[0]), unquote(q[1])) - for q in parse_qsl(query, keep_blank_values=True)] + query_parts = [ + (unquote(q[0]), unquote(q[1])) + for q in parse_qsl(query, keep_blank_values=True) + ] # urlencode will take care of quoting query = urlencode(query_parts) @@ -230,17 +233,17 @@ class Urlizer: Links can have trailing punctuation (periods, commas, close-parens) and leading punctuation (opening parens) and it'll still do the right thing. """ - trailing_punctuation_chars = '.,:;!' - wrapping_punctuation = [('(', ')'), ('[', ']')] - simple_url_re = _lazy_re_compile(r'^https?://\[?\w', re.IGNORECASE) + trailing_punctuation_chars = ".,:;!" + wrapping_punctuation = [("(", ")"), ("[", "]")] + + simple_url_re = _lazy_re_compile(r"^https?://\[?\w", re.IGNORECASE) simple_url_2_re = _lazy_re_compile( - r'^www\.|^(?!http)\w[^@]+\.(com|edu|gov|int|mil|net|org)($|/.*)$', - re.IGNORECASE + r"^www\.|^(?!http)\w[^@]+\.(com|edu|gov|int|mil|net|org)($|/.*)$", re.IGNORECASE ) - word_split_re = _lazy_re_compile(r'''([\s<>"']+)''') + word_split_re = _lazy_re_compile(r"""([\s<>"']+)""") - mailto_template = 'mailto:{local}@{domain}' + mailto_template = "mailto:{local}@{domain}" url_template = '<a href="{href}"{attrs}>{url}</a>' def __call__(self, text, trim_url_limit=None, nofollow=False, autoescape=False): @@ -256,39 +259,48 @@ class Urlizer: safe_input = isinstance(text, SafeData) words = self.word_split_re.split(str(text)) - return ''.join([ - self.handle_word( - word, - safe_input=safe_input, - trim_url_limit=trim_url_limit, - nofollow=nofollow, - autoescape=autoescape, - ) for word in words - ]) + return "".join( + [ + self.handle_word( + word, + safe_input=safe_input, + trim_url_limit=trim_url_limit, + nofollow=nofollow, + autoescape=autoescape, + ) + for word in words + ] + ) def handle_word( - self, word, *, safe_input, trim_url_limit=None, nofollow=False, autoescape=False, + self, + word, + *, + safe_input, + trim_url_limit=None, + nofollow=False, + autoescape=False, ): - if '.' in word or '@' in word or ':' in word: + if "." in word or "@" in word or ":" in word: # lead: Punctuation trimmed from the beginning of the word. # middle: State of the word. # trail: Punctuation trimmed from the end of the word. lead, middle, trail = self.trim_punctuation(word) # Make URL we want to point to. url = None - nofollow_attr = ' rel="nofollow"' if nofollow else '' + nofollow_attr = ' rel="nofollow"' if nofollow else "" if self.simple_url_re.match(middle): url = smart_urlquote(html.unescape(middle)) elif self.simple_url_2_re.match(middle): - url = smart_urlquote('http://%s' % html.unescape(middle)) - elif ':' not in middle and self.is_email_simple(middle): - local, domain = middle.rsplit('@', 1) + url = smart_urlquote("http://%s" % html.unescape(middle)) + elif ":" not in middle and self.is_email_simple(middle): + local, domain = middle.rsplit("@", 1) try: domain = punycode(domain) except UnicodeError: return word url = self.mailto_template.format(local=local, domain=domain) - nofollow_attr = '' + nofollow_attr = "" # Make link. if url: trimmed = self.trim_url(middle, limit=trim_url_limit) @@ -300,7 +312,7 @@ class Urlizer: attrs=nofollow_attr, url=trimmed, ) - return mark_safe(f'{lead}{middle}{trail}') + return mark_safe(f"{lead}{middle}{trail}") else: if safe_input: return mark_safe(word) @@ -315,14 +327,14 @@ class Urlizer: def trim_url(self, x, *, limit): if limit is None or len(x) <= limit: return x - return '%s…' % x[:max(0, limit - 1)] + return "%s…" % x[: max(0, limit - 1)] def trim_punctuation(self, word): """ Trim trailing and wrapping punctuation from `word`. Return the items of the new state. """ - lead, middle, trail = '', word, '' + lead, middle, trail = "", word, "" # Continue trimming until middle remains unchanged. trimmed_something = True while trimmed_something: @@ -330,15 +342,15 @@ class Urlizer: # Trim wrapping punctuation. for opening, closing in self.wrapping_punctuation: if middle.startswith(opening): - middle = middle[len(opening):] + middle = middle[len(opening) :] lead += opening trimmed_something = True # Keep parentheses at the end only if they're balanced. if ( - middle.endswith(closing) and - middle.count(closing) == middle.count(opening) + 1 + middle.endswith(closing) + and middle.count(closing) == middle.count(opening) + 1 ): - middle = middle[:-len(closing)] + middle = middle[: -len(closing)] trail = closing + trail trimmed_something = True # Trim trailing punctuation (after trimming wrapping punctuation, @@ -357,15 +369,15 @@ class Urlizer: def is_email_simple(value): """Return True if value looks like an email address.""" # An @ must be in the middle of the value. - if '@' not in value or value.startswith('@') or value.endswith('@'): + if "@" not in value or value.startswith("@") or value.endswith("@"): return False try: - p1, p2 = value.split('@') + p1, p2 = value.split("@") except ValueError: # value contains more than one @. return False # Dot must be in p2 (e.g. example.com) - if '.' not in p2 or p2.startswith('.'): + if "." not in p2 or p2.startswith("."): return False return True @@ -375,7 +387,9 @@ urlizer = Urlizer() @keep_lazy_text def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False): - return urlizer(text, trim_url_limit=trim_url_limit, nofollow=nofollow, autoescape=autoescape) + return urlizer( + text, trim_url_limit=trim_url_limit, nofollow=nofollow, autoescape=autoescape + ) def avoid_wrapping(value): @@ -391,12 +405,12 @@ def html_safe(klass): A decorator that defines the __html__ method. This helps non-Django templates to detect classes whose __str__ methods return SafeString. """ - if '__html__' in klass.__dict__: + if "__html__" in klass.__dict__: raise ValueError( "can't apply @html_safe to %s because it defines " "__html__()." % klass.__name__ ) - if '__str__' not in klass.__dict__: + if "__str__" not in klass.__dict__: raise ValueError( "can't apply @html_safe to %s because it doesn't " "define __str__()." % klass.__name__ |
