Merge remote-tracking branch 'core/master' into schema-alteration

Conflicts: django/db/models/fields/related.py
author: Andrew Godwin <andrew@aeracode.org> 2013-06-07 11:15:34 +0100
committer: Andrew Godwin <andrew@aeracode.org> 2013-06-07 11:15:34 +0100
commit: 3c296382b8dea5de7f4e1e11b66bd7cecaf2ee51 (patch)
tree: 0ca12593be82971691ffca01a836d00d3fcb3bd4 /django/utils
parent: 7609e0b42e0014a6ad0adf9dafc7018cb268070e (diff)
parent: 357d62d9f2972bf1bc21e5835c12c849143e06af (diff)
11 files changed, 119 insertions, 55 deletions
diff --git a/django/utils/_os.py b/django/utils/_os.py
index 6c1cd17a83..607e02c94d 100644
--- a/django/utils/_os.py
+++ b/django/utils/_os.py
@@ -38,7 +38,7 @@ def upath(path):
     """
     Always return a unicode path.
     """
-    if not six.PY3:
+    if not six.PY3 and not isinstance(path, six.text_type):
         return path.decode(fs_encoding)
     return path
 
diff --git a/django/utils/crypto.py b/django/utils/crypto.py
index 5d0f381ffa..15db972560 100644
--- a/django/utils/crypto.py
+++ b/django/utils/crypto.py
@@ -28,10 +28,6 @@ from django.utils import six
 from django.utils.six.moves import xrange
 
 
-_trans_5c = bytearray([(x ^ 0x5C) for x in xrange(256)])
-_trans_36 = bytearray([(x ^ 0x36) for x in xrange(256)])
-
-
 def salted_hmac(key_salt, value, secret=None):
     """
     Returns the HMAC-SHA1 of 'value', using a key generated from key_salt and a
@@ -130,9 +126,9 @@ def _fast_hmac(key, msg, digest):
     if len(key) > dig1.block_size:
         key = digest(key).digest()
     key += b'\x00' * (dig1.block_size - len(key))
-    dig1.update(key.translate(_trans_36))
+    dig1.update(key.translate(hmac.trans_36))
     dig1.update(msg)
-    dig2.update(key.translate(_trans_5c))
+    dig2.update(key.translate(hmac.trans_5C))
     dig2.update(dig1.digest())
     return dig2
 
diff --git a/django/utils/functional.py b/django/utils/functional.py
index cab74886d3..0606c775ef 100644
--- a/django/utils/functional.py
+++ b/django/utils/functional.py
@@ -4,6 +4,7 @@ from functools import wraps
 import sys
 
 from django.utils import six
+from django.utils.six.moves import copyreg
 
 
 # You can't trivially replace this with `functools.partial` because this binds
@@ -328,15 +329,23 @@ class SimpleLazyObject(LazyObject):
             self._setup()
         return self._wrapped.__dict__
 
-    # Python 3.3 will call __reduce__ when pickling; these methods are needed
-    # to serialize and deserialize correctly. They are not called in earlier
-    # versions of Python.
+    # Python 3.3 will call __reduce__ when pickling; this method is needed
+    # to serialize and deserialize correctly.
     @classmethod
     def __newobj__(cls, *args):
         return cls.__new__(cls, *args)
 
-    def __reduce__(self):
-        return (self.__newobj__, (self.__class__,), self.__getstate__())
+    def __reduce_ex__(self, proto):
+        if proto >= 2:
+            # On Py3, since the default protocol is 3, pickle uses the
+            # ``__newobj__`` method (& more efficient opcodes) for writing.
+            return (self.__newobj__, (self.__class__,), self.__getstate__())
+        else:
+            # On Py2, the default protocol is 0 (for back-compat) & the above
+            # code fails miserably (see regression test). Instead, we return
+            # exactly what's returned if there's no ``__reduce__`` method at
+            # all.
+            return (copyreg._reconstructor, (self.__class__, object, None), self.__getstate__())
 
     # Return a meaningful representation of the lazy object for debugging
     # without evaluating the wrapped object.
diff --git a/django/utils/html.py b/django/utils/html.py
index 8b28d97d13..0d28c77a61 100644
--- a/django/utils/html.py
+++ b/django/utils/html.py
@@ -16,6 +16,9 @@ from django.utils.functional import allow_lazy
 from django.utils import six
 from django.utils.text import normalize_newlines
 
+from .html_parser import HTMLParser, HTMLParseError
+
+
 # Configuration for urlize() function.
 TRAILING_PUNCTUATION = ['.', ',', ':', ';', '.)']
 WRAPPING_PUNCTUATION = [('(', ')'), ('<', '>'), ('[', ']'), ('&lt;', '&gt;')]
@@ -33,7 +36,6 @@ link_target_attribute_re = re.compile(r'(<a [^>]*?)target=[^\s>]+')
 html_gunk_re = re.compile(r'(?:<br clear="all">|<i><\/i>|<b><\/b>|<em><\/em>|<strong><\/strong>|<\/?smallcaps>|<\/?uppercase>)', re.IGNORECASE)
 hard_coded_bullets_re = re.compile(r'((?:<p>(?:%s).*?[a-zA-Z].*?</p>\s*)+)' % '|'.join([re.escape(x) for x in DOTS]), re.DOTALL)
 trailing_empty_content_re = re.compile(r'(?:<p>(?:&nbsp;|\s|<br \/>)*?</p>\s*)+\Z')
-strip_tags_re = re.compile(r'</?\S([^=>]*=(\s*"[^"]*"|\s*\'[^\']*\'|\S*)|[^>])*?>', re.IGNORECASE)
 
 
 def escape(text):
@@ -116,9 +118,31 @@ def linebreaks(value, autoescape=False):
     return '\n\n'.join(paras)
 linebreaks = allow_lazy(linebreaks, six.text_type)
 
+
+class MLStripper(HTMLParser):
+    def __init__(self):
+        HTMLParser.__init__(self)
+        self.reset()
+        self.fed = []
+    def handle_data(self, d):
+        self.fed.append(d)
+    def handle_entityref(self, name):
+        self.fed.append('&%s;' % name)
+    def handle_charref(self, name):
+        self.fed.append('&#%s;' % name)
+    def get_data(self):
+        return ''.join(self.fed)
+
 def strip_tags(value):
     """Returns the given HTML with all tags stripped."""
-    return strip_tags_re.sub('', force_text(value))
+    s = MLStripper()
+    try:
+        s.feed(value)
+        s.close()
+    except HTMLParseError:
+        return value
+    else:
+        return s.get_data()
 strip_tags = allow_lazy(strip_tags)
 
 def remove_tags(html, tags):
@@ -281,3 +305,10 @@ def clean_html(text):
     text = trailing_empty_content_re.sub('', text)
     return text
 clean_html = allow_lazy(clean_html, six.text_type)
+
+def avoid_wrapping(value):
+    """
+    Avoid text wrapping in the middle of a phrase by adding non-breaking
+    spaces where there previously were normal spaces.
+    """
+    return value.replace(" ", "\xa0")
diff --git a/django/utils/http.py b/django/utils/http.py
index 15fac6bfca..f4911b4ec0 100644
--- a/django/utils/http.py
+++ b/django/utils/http.py
@@ -71,7 +71,7 @@ urlunquote_plus = allow_lazy(urlunquote_plus, six.text_type)
 def urlencode(query, doseq=0):
     """
     A version of Python's urllib.urlencode() function that can operate on
-    unicode strings. The parameters are first case to UTF-8 encoded strings and
+    unicode strings. The parameters are first cast to UTF-8 encoded strings and
     then encoded as per normal.
     """
     if isinstance(query, MultiValueDict):
@@ -226,7 +226,10 @@ def same_origin(url1, url2):
     Checks if two URLs are 'same-origin'
     """
     p1, p2 = urllib_parse.urlparse(url1), urllib_parse.urlparse(url2)
-    return (p1.scheme, p1.hostname, p1.port) == (p2.scheme, p2.hostname, p2.port)
+    try:
+        return (p1.scheme, p1.hostname, p1.port) == (p2.scheme, p2.hostname, p2.port)
+    except ValueError:
+        return False
 
 def is_safe_url(url, host=None):
     """
diff --git a/django/utils/image.py b/django/utils/image.py
index 54c11adfee..d251ab9d0b 100644
--- a/django/utils/image.py
+++ b/django/utils/image.py
@@ -124,7 +124,7 @@ def _detect_image_library():
                 import _imaging as PIL_imaging
             except ImportError as err:
                 raise ImproperlyConfigured(
-                    _("The '_imaging' module for the PIL could not be " +
+                    _("The '_imaging' module for the PIL could not be "
                       "imported: %s" % err)
                 )
 
diff --git a/django/utils/ipv6.py b/django/utils/ipv6.py
index 8881574eaa..eaacfb4623 100644
--- a/django/utils/ipv6.py
+++ b/django/utils/ipv6.py
@@ -138,8 +138,7 @@ def _unpack_ipv4(ip_str):
     if not ip_str.lower().startswith('0000:0000:0000:0000:0000:ffff:'):
         return None
 
-    hextets = ip_str.split(':')
-    return hextets[-1]
+    return ip_str.rsplit(':', 1)[1]
 
 def is_valid_ipv6_address(ip_str):
     """
diff --git a/django/utils/log.py b/django/utils/log.py
index a9b62caae1..6734a7261e 100644
--- a/django/utils/log.py
+++ b/django/utils/log.py
@@ -63,6 +63,11 @@ DEFAULT_LOGGING = {
             'level': 'ERROR',
             'propagate': False,
         },
+        'django.security': {
+            'handlers': ['mail_admins'],
+            'level': 'ERROR',
+            'propagate': False,
+        },
         'py.warnings': {
             'handlers': ['console'],
         },
@@ -87,8 +92,8 @@ class AdminEmailHandler(logging.Handler):
             request = record.request
             subject = '%s (%s IP): %s' % (
                 record.levelname,
-                (request.META.get('REMOTE_ADDR') in settings.INTERNAL_IPS
-                 and 'internal' or 'EXTERNAL'),
+                ('internal' if request.META.get('REMOTE_ADDR') in settings.INTERNAL_IPS
+                 else 'EXTERNAL'),
                 record.getMessage()
             )
             filter = get_exception_reporter_filter(request)
diff --git a/django/utils/safestring.py b/django/utils/safestring.py
index 07e0bf4cea..3774012d32 100644
--- a/django/utils/safestring.py
+++ b/django/utils/safestring.py
@@ -4,7 +4,7 @@ without further escaping in HTML. Marking something as a "safe string" means
 that the producer of the string has already turned characters that should not
 be interpreted by the HTML engine (e.g. '<') into the appropriate entities.
 """
-from django.utils.functional import curry, Promise
+from django.utils.functional import curry, Promise, allow_lazy
 from django.utils import six
 
 class EscapeData(object):
@@ -14,13 +14,13 @@ class EscapeBytes(bytes, EscapeData):
     """
     A byte string that should be HTML-escaped when output.
     """
-    pass
+    __new__ = allow_lazy(bytes.__new__, bytes)
 
 class EscapeText(six.text_type, EscapeData):
     """
     A unicode string object that should be HTML-escaped when output.
     """
-    pass
+    __new__ = allow_lazy(six.text_type.__new__, six.text_type)
 
 if six.PY3:
     EscapeString = EscapeText
@@ -37,6 +37,8 @@ class SafeBytes(bytes, SafeData):
     A bytes subclass that has been specifically marked as "safe" (requires no
     further escaping) for HTML output purposes.
     """
+    __new__ = allow_lazy(bytes.__new__, bytes)
+
     def __add__(self, rhs):
         """
         Concatenating a safe byte string with another safe byte string or safe
@@ -69,6 +71,8 @@ class SafeText(six.text_type, SafeData):
     A unicode (Python 2) / str (Python 3) subclass that has been specifically
     marked as "safe" for HTML output purposes.
     """
+    __new__ = allow_lazy(six.text_type.__new__, six.text_type)
+
     def __add__(self, rhs):
         """
         Concatenating a safe unicode string with another safe byte string or
diff --git a/django/utils/timesince.py b/django/utils/timesince.py
index d70ab2ffe1..46c387f262 100644
--- a/django/utils/timesince.py
+++ b/django/utils/timesince.py
@@ -2,6 +2,7 @@ from __future__ import unicode_literals
 
 import datetime
 
+from django.utils.html import avoid_wrapping
 from django.utils.timezone import is_aware, utc
 from django.utils.translation import ugettext, ungettext_lazy
 
@@ -40,18 +41,18 @@ def timesince(d, now=None, reversed=False):
     since = delta.days * 24 * 60 * 60 + delta.seconds
     if since <= 0:
         # d is in the future compared to now, stop processing.
-        return ugettext('0 minutes')
+        return avoid_wrapping(ugettext('0 minutes'))
     for i, (seconds, name) in enumerate(chunks):
         count = since // seconds
         if count != 0:
             break
-    result = name % count
+    result = avoid_wrapping(name % count)
     if i + 1 < len(chunks):
         # Now get the second item
         seconds2, name2 = chunks[i + 1]
         count2 = (since - (seconds * count)) // seconds2
         if count2 != 0:
-            result += ugettext(', ') + name2 % count2
+            result += ugettext(', ') + avoid_wrapping(name2 % count2)
     return result
 
 def timeuntil(d, now=None):
diff --git a/django/utils/translation/trans_real.py b/django/utils/translation/trans_real.py
index 07353c35ee..195badfc00 100644
--- a/django/utils/translation/trans_real.py
+++ b/django/utils/translation/trans_real.py
@@ -10,7 +10,9 @@ from threading import local
 import warnings
 
 from django.utils.importlib import import_module
+from django.utils.datastructures import SortedDict
 from django.utils.encoding import force_str, force_text
+from django.utils.functional import memoize
 from django.utils._os import upath
 from django.utils.safestring import mark_safe, SafeData
 from django.utils import six
@@ -29,6 +31,7 @@ _default = None
 # This is a cache for normalized accept-header languages to prevent multiple
 # file lookups when checking the same locale on repeated requests.
 _accepted = {}
+_checked_languages = {}
 
 # magic gettext number to separate context from message
 CONTEXT_SEPARATOR = "\x04"
@@ -77,7 +80,6 @@ class DjangoTranslation(gettext_module.GNUTranslations):
     def __init__(self, *args, **kw):
         gettext_module.GNUTranslations.__init__(self, *args, **kw)
         self.set_output_charset('utf-8')
-        self.django_output_charset = 'utf-8'
         self.__language = '??'
 
     def merge(self, other):
@@ -140,7 +142,7 @@ def translation(language):
         # doesn't affect en-gb), even though they will both use the core "en"
         # translation. So we have to subvert Python's internal gettext caching.
         base_lang = lambda x: x.split('-', 1)[0]
-        if base_lang(lang) in [base_lang(trans) for trans in _translations]:
+        if base_lang(lang) in [base_lang(trans) for trans in list(_translations)]:
             res._info = res._info.copy()
             res._catalog = res._catalog.copy()
 
@@ -355,34 +357,54 @@ def check_for_language(lang_code):
         if gettext_module.find('django', path, [to_locale(lang_code)]) is not None:
             return True
     return False
+check_for_language = memoize(check_for_language, _checked_languages, 1)
 
-def get_supported_language_variant(lang_code, supported=None):
+def get_supported_language_variant(lang_code, supported=None, strict=False):
     """
     Returns the language-code that's listed in supported languages, possibly
     selecting a more generic variant. Raises LookupError if nothing found.
+
+    If `strict` is False (the default), the function will look for an alternative
+    country-specific variant when the currently checked is not found.
     """
     if supported is None:
         from django.conf import settings
-        supported = dict(settings.LANGUAGES)
-    if lang_code and lang_code not in supported:
-        lang_code = lang_code.split('-')[0] # e.g. if fr-ca is not supported fallback to fr
-    if lang_code and lang_code in supported and check_for_language(lang_code):
-        return lang_code
+        supported = SortedDict(settings.LANGUAGES)
+    if lang_code:
+        # if fr-CA is not supported, try fr-ca; if that fails, fallback to fr.
+        generic_lang_code = lang_code.split('-')[0]
+        variants = (lang_code, lang_code.lower(), generic_lang_code,
+                    generic_lang_code.lower())
+        for code in variants:
+            if code in supported and check_for_language(code):
+                return code
+        if not strict:
+            # if fr-fr is not supported, try fr-ca.
+            for supported_code in supported:
+                if supported_code.startswith((generic_lang_code + '-',
+                                              generic_lang_code.lower() + '-')):
+                    return supported_code
     raise LookupError(lang_code)
 
-def get_language_from_path(path, supported=None):
+def get_language_from_path(path, supported=None, strict=False):
     """
     Returns the language-code if there is a valid language-code
     found in the `path`.
+
+    If `strict` is False (the default), the function will look for an alternative
+    country-specific variant when the currently checked is not found.
     """
     if supported is None:
         from django.conf import settings
-        supported = dict(settings.LANGUAGES)
+        supported = SortedDict(settings.LANGUAGES)
     regex_match = language_code_prefix_re.match(path)
-    if regex_match:
-        lang_code = regex_match.group(1)
-        if lang_code in supported and check_for_language(lang_code):
-            return lang_code
+    if not regex_match:
+        return None
+    lang_code = regex_match.group(1)
+    try:
+        return get_supported_language_variant(lang_code, supported, strict=strict)
+    except LookupError:
+        return None
 
 def get_language_from_request(request, check_path=False):
     """
@@ -396,7 +418,7 @@ def get_language_from_request(request, check_path=False):
     """
     global _accepted
     from django.conf import settings
-    supported = dict(settings.LANGUAGES)
+    supported = SortedDict(settings.LANGUAGES)
 
     if check_path:
         lang_code = get_language_from_path(request.path_info, supported)
@@ -420,11 +442,6 @@ def get_language_from_request(request, check_path=False):
         if accept_lang == '*':
             break
 
-        # We have a very restricted form for our language files (no encoding
-        # specifier, since they all must be UTF-8 and only one possible
-        # language each time. So we avoid the overhead of gettext.find() and
-        # work out the MO file manually.
-
         # 'normalized' is the root name of the locale in POSIX format (which is
         # the format used for the directories holding the MO files).
         normalized = locale.locale_alias.get(to_locale(accept_lang, True))
@@ -438,14 +455,13 @@ def get_language_from_request(request, check_path=False):
             # need to check again.
             return _accepted[normalized]
 
-        for lang, dirname in ((accept_lang, normalized),
-                (accept_lang.split('-')[0], normalized.split('_')[0])):
-            if lang.lower() not in supported:
-                continue
-            for path in all_locale_paths():
-                if os.path.exists(os.path.join(path, dirname, 'LC_MESSAGES', 'django.mo')):
-                    _accepted[normalized] = lang
-                    return lang
+        try:
+            accept_lang = get_supported_language_variant(accept_lang, supported)
+        except LookupError:
+            continue
+        else:
+            _accepted[normalized] = accept_lang
+            return accept_lang
 
     try:
         return get_supported_language_variant(settings.LANGUAGE_CODE, supported)
author	Andrew Godwin <andrew@aeracode.org>	2013-06-07 11:15:34 +0100
committer	Andrew Godwin <andrew@aeracode.org>	2013-06-07 11:15:34 +0100
commit	3c296382b8dea5de7f4e1e11b66bd7cecaf2ee51 (patch)
tree	0ca12593be82971691ffca01a836d00d3fcb3bd4 /django/utils
parent	7609e0b42e0014a6ad0adf9dafc7018cb268070e (diff)
parent	357d62d9f2972bf1bc21e5835c12c849143e06af (diff)