diff options
| author | Justin Bronn <jbronn@gmail.com> | 2007-08-26 01:10:53 +0000 |
|---|---|---|
| committer | Justin Bronn <jbronn@gmail.com> | 2007-08-26 01:10:53 +0000 |
| commit | 2052b508eb92c62fc0678efd4936c5ec1e0e735b (patch) | |
| tree | e510109b74b28c8ccef5f6955727cb9dce3da655 /django/utils | |
| parent | a7297a255f4bb86f608ea251e00253d18c31d9d4 (diff) | |
gis: Made necessary modifications for unicode, manage refactor, backend refactor and merged 5584-6000 via svnmerge from [repos:django/trunk trunk].
git-svn-id: http://code.djangoproject.com/svn/django/branches/gis@6018 bcc190cf-cafb-0310-a4f2-bffc1f526a37
Diffstat (limited to 'django/utils')
| -rw-r--r-- | django/utils/_os.py | 23 | ||||
| -rw-r--r-- | django/utils/cache.py | 20 | ||||
| -rw-r--r-- | django/utils/datastructures.py | 13 | ||||
| -rw-r--r-- | django/utils/dateformat.py | 56 | ||||
| -rw-r--r-- | django/utils/dates.py | 6 | ||||
| -rw-r--r-- | django/utils/decorators.py | 26 | ||||
| -rw-r--r-- | django/utils/encoding.py | 85 | ||||
| -rw-r--r-- | django/utils/feedgenerator.py | 55 | ||||
| -rw-r--r-- | django/utils/functional.py | 67 | ||||
| -rw-r--r-- | django/utils/html.py | 80 | ||||
| -rw-r--r-- | django/utils/http.py | 38 | ||||
| -rw-r--r-- | django/utils/maxlength.py | 67 | ||||
| -rw-r--r-- | django/utils/stopwords.py | 2 | ||||
| -rw-r--r-- | django/utils/text.py | 55 | ||||
| -rw-r--r-- | django/utils/timesince.py | 24 | ||||
| -rw-r--r-- | django/utils/translation/__init__.py | 23 | ||||
| -rw-r--r-- | django/utils/translation/trans_null.py | 11 | ||||
| -rw-r--r-- | django/utils/translation/trans_real.py | 95 | ||||
| -rw-r--r-- | django/utils/tzinfo.py | 13 | ||||
| -rw-r--r-- | django/utils/version.py | 39 |
20 files changed, 603 insertions, 195 deletions
diff --git a/django/utils/_os.py b/django/utils/_os.py new file mode 100644 index 0000000000..30d7cbd444 --- /dev/null +++ b/django/utils/_os.py @@ -0,0 +1,23 @@ +from os.path import join, normcase, abspath, sep + +def safe_join(base, *paths): + """ + Joins one or more path components to the base path component intelligently. + Returns a normalized, absolute version of the final path. + + The final path must be located inside of the base path component (otherwise + a ValueError is raised). + """ + # We need to use normcase to ensure we don't false-negative on case + # insensitive operating systems (like Windows). + final_path = normcase(abspath(join(base, *paths))) + base_path = normcase(abspath(base)) + base_path_len = len(base_path) + # Ensure final_path starts with base_path and that the next character after + # the final path is os.sep (or nothing, in which case final_path must be + # equal to base_path). + if not final_path.startswith(base_path) \ + or final_path[base_path_len:base_path_len+1] not in ('', sep): + raise ValueError('the joined path is located outside of the base path' + ' component') + return final_path diff --git a/django/utils/cache.py b/django/utils/cache.py index c8031a409a..2753d86949 100644 --- a/django/utils/cache.py +++ b/django/utils/cache.py @@ -17,9 +17,13 @@ A example: i18n middleware would need to distinguish caches by the "Accept-language" header. """ -import datetime, md5, re +import md5 +import re +import time +from email.Utils import formatdate from django.conf import settings from django.core.cache import cache +from django.utils.encoding import smart_str cc_delim_re = re.compile(r'\s*,\s*') @@ -43,10 +47,10 @@ def patch_cache_control(response, **kwargs): return (t[0].lower().replace('-', '_'), True) def dictvalue(t): - if t[1] == True: + if t[1] is True: return t[0] else: - return t[0] + '=' + str(t[1]) + return t[0] + '=' + smart_str(t[1]) if response.has_header('Cache-Control'): cc = cc_delim_re.split(response['Cache-Control']) @@ -72,16 +76,14 @@ def patch_response_headers(response, cache_timeout=None): """ if cache_timeout is None: cache_timeout = settings.CACHE_MIDDLEWARE_SECONDS - now = datetime.datetime.utcnow() + if cache_timeout < 0: + cache_timeout = 0 # Can't have max-age negative if not response.has_header('ETag'): response['ETag'] = md5.new(response.content).hexdigest() if not response.has_header('Last-Modified'): - response['Last-Modified'] = now.strftime('%a, %d %b %Y %H:%M:%S GMT') + response['Last-Modified'] = formatdate()[:26] + "GMT" if not response.has_header('Expires'): - expires = now + datetime.timedelta(0, cache_timeout) - response['Expires'] = expires.strftime('%a, %d %b %Y %H:%M:%S GMT') - if cache_timeout < 0: - cache_timeout = 0 # Can't have max-age negative + response['Expires'] = formatdate(time.time() + cache_timeout)[:26] + "GMT" patch_cache_control(response, max_age=cache_timeout) def add_never_cache_headers(response): diff --git a/django/utils/datastructures.py b/django/utils/datastructures.py index 60bc0051a2..4b60d1d194 100644 --- a/django/utils/datastructures.py +++ b/django/utils/datastructures.py @@ -267,3 +267,16 @@ class DotExpandedDict(dict): current[bits[-1]] = v except TypeError: # Special-case if current isn't a dict. current = {bits[-1] : v} + +class FileDict(dict): + """ + A dictionary used to hold uploaded file contents. The only special feature + here is that repr() of this object won't dump the entire contents of the + file to the output. A handy safeguard for a large file upload. + """ + def __repr__(self): + if 'content' in self: + d = dict(self, content='<omitted>') + return dict.__repr__(d) + return dict.__repr__(self) + diff --git a/django/utils/dateformat.py b/django/utils/dateformat.py index a558e3a69f..d5f3499d82 100644 --- a/django/utils/dateformat.py +++ b/django/utils/dateformat.py @@ -11,9 +11,10 @@ Usage: >>> """ -from django.utils.dates import MONTHS, MONTHS_3, MONTHS_AP, WEEKDAYS +from django.utils.dates import MONTHS, MONTHS_3, MONTHS_AP, WEEKDAYS, WEEKDAYS_ABBR from django.utils.tzinfo import LocalTimezone -from django.utils.translation import gettext as _ +from django.utils.translation import string_concat, ugettext as _ +from django.utils.encoding import force_unicode from calendar import isleap, monthrange import re, time @@ -23,12 +24,12 @@ re_escaped = re.compile(r'\\(.)') class Formatter(object): def format(self, formatstr): pieces = [] - for i, piece in enumerate(re_formatchars.split(formatstr)): + for i, piece in enumerate(re_formatchars.split(force_unicode(formatstr))): if i % 2: - pieces.append(str(getattr(self, piece)())) + pieces.append(force_unicode(getattr(self, piece)())) elif piece: pieces.append(re_escaped.sub(r'\1', piece)) - return ''.join(pieces) + return u''.join(pieces) class TimeFormat(Formatter): def __init__(self, t): @@ -52,13 +53,14 @@ class TimeFormat(Formatter): def f(self): """ - Time, in 12-hour hours and minutes, with minutes left off if they're zero. + Time, in 12-hour hours and minutes, with minutes left off if they're + zero. Examples: '1', '1:30', '2:05', '2' Proprietary extension. """ if self.data.minute == 0: return self.g() - return '%s:%s' % (self.g(), self.i()) + return u'%s:%s' % (self.g(), self.i()) def g(self): "Hour, 12-hour format without leading zeros; i.e. '1' to '12'" @@ -74,15 +76,15 @@ class TimeFormat(Formatter): def h(self): "Hour, 12-hour format; i.e. '01' to '12'" - return '%02d' % self.g() + return u'%02d' % self.g() def H(self): "Hour, 24-hour format; i.e. '00' to '23'" - return '%02d' % self.G() + return u'%02d' % self.G() def i(self): "Minutes; i.e. '00' to '59'" - return '%02d' % self.data.minute + return u'%02d' % self.data.minute def P(self): """ @@ -95,11 +97,11 @@ class TimeFormat(Formatter): return _('midnight') if self.data.minute == 0 and self.data.hour == 12: return _('noon') - return '%s %s' % (self.f(), self.a()) + return u'%s %s' % (self.f(), self.a()) def s(self): "Seconds; i.e. '00' to '59'" - return '%02d' % self.data.second + return u'%02d' % self.data.second class DateFormat(TimeFormat): year_days = [None, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334] @@ -117,11 +119,11 @@ class DateFormat(TimeFormat): def d(self): "Day of the month, 2 digits with leading zeros; i.e. '01' to '31'" - return '%02d' % self.data.day + return u'%02d' % self.data.day def D(self): "Day of the week, textual, 3 letters; e.g. 'Fri'" - return WEEKDAYS[self.data.weekday()][0:3] + return WEEKDAYS_ABBR[self.data.weekday()] def F(self): "Month, textual, long; e.g. 'January'" @@ -130,9 +132,9 @@ class DateFormat(TimeFormat): def I(self): "'1' if Daylight Savings Time, '0' otherwise." if self.timezone.dst(self.data): - return '1' + return u'1' else: - return '0' + return u'0' def j(self): "Day of the month without leading zeros; i.e. '1' to '31'" @@ -148,7 +150,7 @@ class DateFormat(TimeFormat): def m(self): "Month; i.e. '01' to '12'" - return '%02d' % self.data.month + return u'%02d' % self.data.month def M(self): "Month, textual, 3 letters; e.g. 'Jan'" @@ -165,7 +167,7 @@ class DateFormat(TimeFormat): def O(self): "Difference to Greenwich time in hours; e.g. '+0200'" tz = self.timezone.utcoffset(self.data) - return "%+03d%02d" % (tz.seconds // 3600, (tz.seconds // 60) % 60) + return u"%+03d%02d" % (tz.seconds // 3600, (tz.seconds // 60) % 60) def r(self): "RFC 822 formatted date; e.g. 'Thu, 21 Dec 2000 16:01:07 +0200'" @@ -174,26 +176,26 @@ class DateFormat(TimeFormat): def S(self): "English ordinal suffix for the day of the month, 2 characters; i.e. 'st', 'nd', 'rd' or 'th'" if self.data.day in (11, 12, 13): # Special case - return 'th' + return u'th' last = self.data.day % 10 if last == 1: - return 'st' + return u'st' if last == 2: - return 'nd' + return u'nd' if last == 3: - return 'rd' - return 'th' + return u'rd' + return u'th' def t(self): "Number of days in the given month; i.e. '28' to '31'" - return '%02d' % monthrange(self.data.year, self.data.month)[1] + return u'%02d' % monthrange(self.data.year, self.data.month)[1] def T(self): "Time zone of this machine; e.g. 'EST' or 'MDT'" name = self.timezone.tzname(self.data) if name is None: name = self.format('O') - return name + return unicode(name) def U(self): "Seconds since the Unix epoch (January 1 1970 00:00:00 GMT)" @@ -225,14 +227,14 @@ class DateFormat(TimeFormat): week_number = 1 else: j = day_of_year + (7 - weekday) + (jan1_weekday - 1) - week_number = j / 7 + week_number = j // 7 if jan1_weekday > 4: week_number -= 1 return week_number def y(self): "Year, 2 digits; e.g. '99'" - return str(self.data.year)[2:] + return unicode(self.data.year)[2:] def Y(self): "Year, 4 digits; e.g. '1999'" diff --git a/django/utils/dates.py b/django/utils/dates.py index 111f32e4fc..4427af8ca9 100644 --- a/django/utils/dates.py +++ b/django/utils/dates.py @@ -1,11 +1,15 @@ "Commonly-used date structures" -from django.utils.translation import gettext_lazy as _ +from django.utils.translation import ugettext_lazy as _ WEEKDAYS = { 0:_('Monday'), 1:_('Tuesday'), 2:_('Wednesday'), 3:_('Thursday'), 4:_('Friday'), 5:_('Saturday'), 6:_('Sunday') } +WEEKDAYS_ABBR = { + 0:_('Mon'), 1:_('Tue'), 2:_('Wed'), 3:_('Thu'), 4:_('Fri'), + 5:_('Sat'), 6:_('Sun') +} WEEKDAYS_REV = { 'monday':0, 'tuesday':1, 'wednesday':2, 'thursday':3, 'friday':4, 'saturday':5, 'sunday':6 diff --git a/django/utils/decorators.py b/django/utils/decorators.py index 1c6cc8c7de..57ce29fca4 100644 --- a/django/utils/decorators.py +++ b/django/utils/decorators.py @@ -1,12 +1,36 @@ "Functions that help with dynamically creating decorators for views." +import types + def decorator_from_middleware(middleware_class): """ Given a middleware class (not an instance), returns a view decorator. This lets you use middleware functionality on a per-view basis. """ - def _decorator_from_middleware(view_func, *args, **kwargs): + def _decorator_from_middleware(*args, **kwargs): + # For historical reasons, these "decorators" are also called as + # dec(func, *args) instead of dec(*args)(func). We handle both forms + # for backwards compatibility. + has_func = True + try: + view_func = kwargs.pop('view_func') + except KeyError: + if len(args): + view_func, args = args[0], args[1:] + else: + has_func = False + if not (has_func and isinstance(view_func, types.FunctionType)): + # We are being called as a decorator. + if has_func: + args = (view_func,) + args + middleware = middleware_class(*args, **kwargs) + + def decorator_func(fn): + return _decorator_from_middleware(fn, *args, **kwargs) + return decorator_func + middleware = middleware_class(*args, **kwargs) + def _wrapped_view(request, *args, **kwargs): if hasattr(middleware, 'process_request'): result = middleware.process_request(request) diff --git a/django/utils/encoding.py b/django/utils/encoding.py index 4774fb0d26..2319496538 100644 --- a/django/utils/encoding.py +++ b/django/utils/encoding.py @@ -1,32 +1,83 @@ -from django.conf import settings +import types +import urllib from django.utils.functional import Promise -def smart_unicode(s): +class StrAndUnicode(object): + """ + A class whose __str__ returns its __unicode__ as a UTF-8 bytestring. + + Useful as a mix-in. + """ + def __str__(self): + return self.__unicode__().encode('utf-8') + +def smart_unicode(s, encoding='utf-8', strings_only=False, errors='strict'): + """ + Returns a unicode object representing 's'. Treats bytestrings using the + 'encoding' codec. + + If strings_only is True, don't convert (some) non-string-like objects. + """ if isinstance(s, Promise): - # The input is the result of a gettext_lazy() call, or similar. It will - # already be encoded in DEFAULT_CHARSET on evaluation and we don't want - # to evaluate it until render time. - # FIXME: This isn't totally consistent, because it eventually returns a - # bytestring rather than a unicode object. It works wherever we use - # smart_unicode() at the moment. Fixing this requires work in the - # i18n internals. + # The input is the result of a gettext_lazy() call. + return s + return force_unicode(s, encoding, strings_only, errors) + +def force_unicode(s, encoding='utf-8', strings_only=False, errors='strict'): + """ + Similar to smart_unicode, except that lazy instances are resolved to + strings, rather than kept as lazy objects. + + If strings_only is True, don't convert (some) non-string-like objects. + """ + if strings_only and isinstance(s, (types.NoneType, int)): return s if not isinstance(s, basestring,): if hasattr(s, '__unicode__'): s = unicode(s) else: - s = unicode(str(s), settings.DEFAULT_CHARSET) + s = unicode(str(s), encoding, errors) elif not isinstance(s, unicode): - s = unicode(s, settings.DEFAULT_CHARSET) + s = unicode(s, encoding, errors) return s -class StrAndUnicode(object): +def smart_str(s, encoding='utf-8', strings_only=False, errors='strict'): """ - A class whose __str__ returns its __unicode__ as a bytestring - according to settings.DEFAULT_CHARSET. + Returns a bytestring version of 's', encoded as specified in 'encoding'. - Useful as a mix-in. + If strings_only is True, don't convert (some) non-string-like objects. """ - def __str__(self): - return self.__unicode__().encode(settings.DEFAULT_CHARSET) + if strings_only and isinstance(s, (types.NoneType, int)): + return s + if isinstance(s, Promise): + return unicode(s).encode(encoding, errors) + elif not isinstance(s, basestring): + try: + return str(s) + except UnicodeEncodeError: + return unicode(s).encode(encoding, errors) + elif isinstance(s, unicode): + return s.encode(encoding, errors) + elif s and encoding != 'utf-8': + return s.decode('utf-8', errors).encode(encoding, errors) + else: + return s + +def iri_to_uri(iri): + """ + Convert an Internationalized Resource Identifier (IRI) portion to a URI + portion that is suitable for inclusion in a URL. + + This is the algorithm from section 3.1 of RFC 3987. However, since we are + assuming input is either UTF-8 or unicode already, we can simplify things a + little from the full method. + + Returns an ASCII string containing the encoded result. + """ + # The list of safe characters here is constructed from the printable ASCII + # characters that are not explicitly excluded by the list at the end of + # section 3.1 of RFC 3987. + if iri is None: + return iri + return urllib.quote(smart_str(iri), safe='/#%[]=:;$&()+,!?') diff --git a/django/utils/feedgenerator.py b/django/utils/feedgenerator.py index 2c82e9a37a..6b6dedfbe9 100644 --- a/django/utils/feedgenerator.py +++ b/django/utils/feedgenerator.py @@ -19,6 +19,7 @@ http://diveintomark.org/archives/2004/02/04/incompatible-rss """ from django.utils.xmlutils import SimplerXMLGenerator +from django.utils.encoding import force_unicode, iri_to_uri import datetime, re, time import email.Utils @@ -34,25 +35,29 @@ def get_tag_uri(url, date): if date is not None: tag = re.sub('/', ',%s:/' % date.strftime('%Y-%m-%d'), tag, 1) tag = re.sub('#', '/', tag) - return 'tag:' + tag + return u'tag:' + tag class SyndicationFeed(object): "Base class for all syndication feeds. Subclasses should provide write()" def __init__(self, title, link, description, language=None, author_email=None, author_name=None, author_link=None, subtitle=None, categories=None, - feed_url=None, feed_copyright=None): + feed_url=None, feed_copyright=None, feed_guid=None): + to_unicode = lambda s: force_unicode(s, strings_only=True) + if categories: + categories = [force_unicode(c) for c in categories] self.feed = { - 'title': title, - 'link': link, - 'description': description, - 'language': language, - 'author_email': author_email, - 'author_name': author_name, - 'author_link': author_link, - 'subtitle': subtitle, + 'title': to_unicode(title), + 'link': iri_to_uri(link), + 'description': to_unicode(description), + 'language': force_unicode(language), + 'author_email': to_unicode(author_email), + 'author_name': to_unicode(author_name), + 'author_link': iri_to_uri(author_link), + 'subtitle': to_unicode(subtitle), 'categories': categories or (), - 'feed_url': feed_url, - 'feed_copyright': feed_copyright, + 'feed_url': iri_to_uri(feed_url), + 'feed_copyright': to_unicode(feed_copyright), + 'id': feed_guid or link, } self.items = [] @@ -64,19 +69,22 @@ class SyndicationFeed(object): objects except pubdate, which is a datetime.datetime object, and enclosure, which is an instance of the Enclosure class. """ + to_unicode = lambda s: force_unicode(s, strings_only=True) + if categories: + categories = [to_unicode(c) for c in categories] self.items.append({ - 'title': title, - 'link': link, - 'description': description, - 'author_email': author_email, - 'author_name': author_name, - 'author_link': author_link, + 'title': to_unicode(title), + 'link': iri_to_uri(link), + 'description': to_unicode(description), + 'author_email': to_unicode(author_email), + 'author_name': to_unicode(author_name), + 'author_link': iri_to_uri(author_link), 'pubdate': pubdate, - 'comments': comments, - 'unique_id': unique_id, + 'comments': to_unicode(comments), + 'unique_id': to_unicode(unique_id), 'enclosure': enclosure, 'categories': categories or (), - 'item_copyright': item_copyright, + 'item_copyright': to_unicode(item_copyright), }) def num_items(self): @@ -114,7 +122,8 @@ class Enclosure(object): "Represents an RSS enclosure" def __init__(self, url, length, mime_type): "All args are expected to be Python Unicode objects" - self.url, self.length, self.mime_type = url, length, mime_type + self.length, self.mime_type = length, mime_type + self.url = iri_to_uri(url) class RssFeed(SyndicationFeed): mime_type = 'application/rss+xml' @@ -205,7 +214,7 @@ class Atom1Feed(SyndicationFeed): handler.addQuickElement(u"link", "", {u"rel": u"alternate", u"href": self.feed['link']}) if self.feed['feed_url'] is not None: handler.addQuickElement(u"link", "", {u"rel": u"self", u"href": self.feed['feed_url']}) - handler.addQuickElement(u"id", self.feed['link']) + handler.addQuickElement(u"id", self.feed['id']) handler.addQuickElement(u"updated", rfc3339_date(self.latest_post_date()).decode('ascii')) if self.feed['author_name'] is not None: handler.startElement(u"author", {}) diff --git a/django/utils/functional.py b/django/utils/functional.py index a57546ad2d..734704f6f3 100644 --- a/django/utils/functional.py +++ b/django/utils/functional.py @@ -3,22 +3,24 @@ def curry(_curried_func, *args, **kwargs): return _curried_func(*(args+moreargs), **dict(kwargs, **morekwargs)) return _curried -def memoize(func, cache): +def memoize(func, cache, num_args): """ Wrap a function so that results for any argument tuple are stored in 'cache'. Note that the args to the function must be usable as dictionary keys. + + Only the first num_args are considered when creating the key. """ def wrapper(*args): - if args in cache: - return cache[args] - + mem_args = args[:num_args] + if mem_args in cache: + return cache[mem_args] result = func(*args) - cache[args] = result + cache[mem_args] = result return result return wrapper -class Promise: +class Promise(object): """ This is just a base class for the proxy class created in the closure of the lazy function. It can be used to recognize @@ -47,6 +49,11 @@ def lazy(func, *resultclasses): self.__dispatch[resultclass] = {} for (k, v) in resultclass.__dict__.items(): setattr(self, k, self.__promise__(resultclass, k, v)) + self._delegate_str = str in resultclasses + self._delegate_unicode = unicode in resultclasses + assert not (self._delegate_str and self._delegate_unicode), "Cannot call lazy() with both str and unicode return types." + if self._delegate_unicode: + self.__unicode__ = self.__unicode_cast def __promise__(self, klass, funcname, func): # Builds a wrapper around some magic method and registers that magic @@ -62,8 +69,56 @@ def lazy(func, *resultclasses): self.__dispatch[klass][funcname] = func return __wrapper__ + def __unicode_cast(self): + return self.__func(*self.__args, **self.__kw) + + def __str__(self): + # As __str__ is always a method on the type (class), it is looked + # up (and found) there first. So we can't just assign to it on a + # per-instance basis in __init__. + if self._delegate_str: + return str(self.__func(*self.__args, **self.__kw)) + else: + return Promise.__str__(self) + + def __cmp__(self, rhs): + if self._delegate_str: + s = str(self.__func(*self.__args, **self.__kw)) + elif self._delegate_unicode: + s = unicode(self.__func(*self.__args, **self.__kw)) + else: + s = self.__func(*self.__args, **self.__kw) + if isinstance(rhs, Promise): + return -cmp(rhs, s) + else: + return cmp(s, rhs) + + def __mod__(self, rhs): + if self._delegate_str: + return str(self) % rhs + elif self._delegate_unicode: + return unicode(self) % rhs + else: + raise AssertionError('__mod__ not supported for non-string types') + def __wrapper__(*args, **kw): # Creates the proxy object, instead of the actual value. return __proxy__(args, kw) return __wrapper__ + +def allow_lazy(func, *resultclasses): + """ + A decorator that allows a function to be called with one or more lazy + arguments. If none of the args are lazy, the function is evaluated + immediately, otherwise a __proxy__ is returned that will evaluate the + function when needed. + """ + def wrapper(*args, **kwargs): + for arg in list(args) + kwargs.values(): + if isinstance(arg, Promise): + break + else: + return func(*args, **kwargs) + return lazy(func, *resultclasses)(*args, **kwargs) + return wrapper diff --git a/django/utils/html.py b/django/utils/html.py index e1860627ce..ebd04d1b3c 100644 --- a/django/utils/html.py +++ b/django/utils/html.py @@ -1,7 +1,10 @@ "HTML utilities suitable for global use." -import re, string -from django.utils.encoding import smart_unicode +import re +import string + +from django.utils.encoding import force_unicode +from django.utils.functional import allow_lazy # Configuration for urlize() function LEADING_PUNCTUATION = ['(', '<', '<'] @@ -23,40 +26,45 @@ trailing_empty_content_re = re.compile(r'(?:<p>(?: |\s|<br \/>)*?</p>\s*)+\ del x # Temporary variable def escape(html): - "Returns the given HTML with ampersands, quotes and carets encoded" - if not isinstance(html, basestring): - html = str(html) - return html.replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"').replace("'", ''') + "Return the given HTML with ampersands, quotes and carets encoded." + return force_unicode(html).replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"').replace("'", ''') +escape = allow_lazy(escape, unicode) def linebreaks(value): - "Converts newlines into <p> and <br />s" - value = re.sub(r'\r\n|\r|\n', '\n', value) # normalize newlines + "Convert newlines into <p> and <br />s." + value = re.sub(r'\r\n|\r|\n', '\n', force_unicode(value)) # normalize newlines paras = re.split('\n{2,}', value) - paras = ['<p>%s</p>' % p.strip().replace('\n', '<br />') for p in paras] - return '\n\n'.join(paras) + paras = [u'<p>%s</p>' % p.strip().replace('\n', '<br />') for p in paras] + return u'\n\n'.join(paras) +linebreaks = allow_lazy(linebreaks, unicode) def strip_tags(value): - "Returns the given HTML with all tags stripped" - return re.sub(r'<[^>]*?>', '', value) + "Return the given HTML with all tags stripped." + return re.sub(r'<[^>]*?>', '', force_unicode(value)) +strip_tags = allow_lazy(strip_tags) def strip_spaces_between_tags(value): - "Returns the given HTML with spaces between tags removed" - return re.sub(r'>\s+<', '><', value) + "Return the given HTML with spaces between tags removed." + return re.sub(r'>\s+<', '><', force_unicode(value)) +strip_spaces_between_tags = allow_lazy(strip_spaces_between_tags, unicode) def strip_entities(value): - "Returns the given HTML with all entities (&something;) stripped" - return re.sub(r'&(?:\w+|#\d);', '', value) + "Return the given HTML with all entities (&something;) stripped." + return re.sub(r'&(?:\w+|#\d+);', '', force_unicode(value)) +strip_entities = allow_lazy(strip_entities, unicode) def fix_ampersands(value): - "Returns the given HTML with all unencoded ampersands encoded correctly" - return unencoded_ampersands_re.sub('&', value) + "Return the given HTML with all unencoded ampersands encoded correctly." + return unencoded_ampersands_re.sub('&', force_unicode(value)) +fix_ampersands = allow_lazy(fix_ampersands, unicode) def urlize(text, trim_url_limit=None, nofollow=False): """ - Converts any URLs in text into clickable links. Works on http://, https:// - and www. links. Links can have trailing punctuation (periods, commas, - close-parens) and leading punctuation (opening parens) and it'll still do - the right thing. + Convert any URLs in text into clickable links. + + Works on http://, https://, and www. links. Links can have trailing + punctuation (periods, commas, close-parens) and leading punctuation + (opening parens) and it'll still do the right thing. If trim_url_limit is not None, the URLs in link text longer than this limit will truncated to trim_url_limit-3 characters and appended with an elipsis. @@ -65,7 +73,7 @@ def urlize(text, trim_url_limit=None, nofollow=False): attribute. """ trim_url = lambda x, limit=trim_url_limit: limit is not None and (len(x) > limit and ('%s...' % x[:max(0, limit - 3)])) or x - words = word_split_re.split(text) + words = word_split_re.split(force_unicode(text)) nofollow_attr = nofollow and ' rel="nofollow"' or '' for i, word in enumerate(words): match = punctuation_re.match(word) @@ -82,22 +90,23 @@ def urlize(text, trim_url_limit=None, nofollow=False): middle = '<a href="mailto:%s">%s</a>' % (middle, middle) if lead + middle + trail != word: words[i] = lead + middle + trail - return ''.join(words) + return u''.join(words) +urlize = allow_lazy(urlize, unicode) def clean_html(text): """ - Cleans the given HTML. Specifically, it does the following: - * Converts <b> and <i> to <strong> and <em>. - * Encodes all ampersands correctly. - * Removes all "target" attributes from <a> tags. - * Removes extraneous HTML, such as presentational tags that open and + Clean the given HTML. Specifically, do the following: + * Convert <b> and <i> to <strong> and <em>. + * Encode all ampersands correctly. + * Remove all "target" attributes from <a> tags. + * Remove extraneous HTML, such as presentational tags that open and immediately close and <br clear="all">. - * Converts hard-coded bullets into HTML unordered lists. - * Removes stuff like "<p> </p>", but only if it's at the + * Convert hard-coded bullets into HTML unordered lists. + * Remove stuff like "<p> </p>", but only if it's at the bottom of the text. """ from django.utils.text import normalize_newlines - text = normalize_newlines(text) + text = normalize_newlines(force_unicode(text)) text = re.sub(r'<(/?)\s*b\s*>', '<\\1strong>', text) text = re.sub(r'<(/?)\s*i\s*>', '<\\1em>', text) text = fix_ampersands(text) @@ -110,9 +119,10 @@ def clean_html(text): s = match.group().replace('</p>', '</li>') for d in DOTS: s = s.replace('<p>%s' % d, '<li>') - return '<ul>\n%s\n</ul>' % s + return u'<ul>\n%s\n</ul>' % s text = hard_coded_bullets_re.sub(replace_p_tags, text) - # Remove stuff like "<p> </p>", but only if it's at the bottom of the text. + # Remove stuff like "<p> </p>", but only if it's at the bottom + # of the text. text = trailing_empty_content_re.sub('', text) return text - +clean_html = allow_lazy(clean_html, unicode) diff --git a/django/utils/http.py b/django/utils/http.py new file mode 100644 index 0000000000..4c3b6af868 --- /dev/null +++ b/django/utils/http.py @@ -0,0 +1,38 @@ +import urllib +from django.utils.encoding import smart_str, force_unicode +from django.utils.functional import allow_lazy + +def urlquote(url, safe='/'): + """ + A version of Python's urllib.quote() function that can operate on unicode + strings. The url is first UTF-8 encoded before quoting. The returned string + can safely be used as part of an argument to a subsequent iri_to_uri() call + without double-quoting occurring. + """ + return force_unicode(urllib.quote(smart_str(url))) +urlquote = allow_lazy(urlquote, unicode) + +def urlquote_plus(url, safe=''): + """ + A version of Python's urllib.quote_plus() function that can operate on + unicode strings. The url is first UTF-8 encoded before quoting. The + returned string can safely be used as part of an argument to a subsequent + iri_to_uri() call without double-quoting occurring. + """ + return force_unicode(urllib.quote_plus(smart_str(url), safe)) +urlquote_plus = allow_lazy(urlquote_plus, unicode) + +def urlencode(query, doseq=0): + """ + A version of Python's urllib.urlencode() function that can operate on + unicode strings. The parameters are first case to UTF-8 encoded strings and + then encoded as per normal. + """ + if hasattr(query, 'items'): + query = query.items() + return urllib.urlencode( + [(smart_str(k), + isinstance(v, (list,tuple)) and [smart_str(i) for i in v] or smart_str(v)) + for k, v in query], + doseq) + diff --git a/django/utils/maxlength.py b/django/utils/maxlength.py new file mode 100644 index 0000000000..9216fe1c3a --- /dev/null +++ b/django/utils/maxlength.py @@ -0,0 +1,67 @@ +""" +Utilities for providing backwards compatibility for the maxlength argument, +which has been replaced by max_length, see ticket #2101. +""" + +from warnings import warn + +def get_maxlength(self): + return self.max_length + +def set_maxlength(self, value): + self.max_length = value + +def legacy_maxlength(max_length, maxlength): + """ + Consolidates max_length and maxlength, providing backwards compatibilty + for the legacy "maxlength" argument. + If one of max_length or maxlength is given, then that value is returned. + If both are given, a TypeError is raised. + If maxlength is used at all, a deprecation warning is issued. + """ + if maxlength is not None: + warn("maxlength is deprecated, use max_length instead.", + PendingDeprecationWarning, + stacklevel=3) + if max_length is not None: + raise TypeError("field can not take both the max_length" + " argument and the legacy maxlength argument.") + max_length = maxlength + return max_length + +def remove_maxlength(func): + """ + A decorator to be used on a class's __init__ that provides backwards + compatibilty for the legacy "maxlength" keyword argument, i.e. + name = models.CharField(maxlength=20) + It does this by changing the passed "maxlength" keyword argument + (if it exists) into a "max_length" keyword argument. + """ + def inner(self, *args, **kwargs): + max_length = kwargs.get('max_length', None) + # pop maxlength because we don't want this going to __init__. + maxlength = kwargs.pop('maxlength', None) + max_length = legacy_maxlength(max_length, maxlength) + # Only set the max_length keyword argument if we got a value back. + if max_length is not None: + kwargs['max_length'] = max_length + func(self, *args, **kwargs) + return inner + +# This metaclass is used in two places, and should be removed when legacy +# support for maxlength is dropped. +# * oldforms.FormField +# * db.models.fields.Field + +class LegacyMaxlength(type): + """ + Metaclass for providing backwards compatibility support for the + "maxlength" keyword argument. + """ + + def __init__(cls, name, bases, attrs): + super(LegacyMaxlength, cls).__init__(name, bases, attrs) + # Decorate the class's __init__ to remove any maxlength keyword. + cls.__init__ = remove_maxlength(cls.__init__) + # Support accessing and setting to the legacy maxlength attribute. + cls.maxlength = property(get_maxlength, set_maxlength) diff --git a/django/utils/stopwords.py b/django/utils/stopwords.py index dea5660413..18aeb7f5d3 100644 --- a/django/utils/stopwords.py +++ b/django/utils/stopwords.py @@ -38,5 +38,5 @@ def strip_stopwords(sentence): for word in words: if word.lower() not in stopwords: sentence.append(word) - return ' '.join(sentence) + return u' '.join(sentence) diff --git a/django/utils/text.py b/django/utils/text.py index c73ab908f3..4670ab47fa 100644 --- a/django/utils/text.py +++ b/django/utils/text.py @@ -1,15 +1,19 @@ import re - from django.conf import settings +from django.utils.encoding import force_unicode +from django.utils.functional import allow_lazy +from django.utils.translation import ugettext_lazy # Capitalizes the first letter of a string. -capfirst = lambda x: x and x[0].upper() + x[1:] +capfirst = lambda x: x and force_unicode(x)[0].upper() + force_unicode(x)[1:] +capfirst = allow_lazy(capfirst, unicode) def wrap(text, width): """ A word-wrap function that preserves existing line breaks and most spaces in the text. Expects that existing line breaks are posix newlines. """ + text = force_unicode(text) def _generator(): it = iter(text.split(' ')) word = it.next() @@ -29,29 +33,34 @@ def wrap(text, width): if len(lines) > 1: pos = len(lines[-1]) yield word - return "".join(_generator()) + return u''.join(_generator()) +wrap = allow_lazy(wrap, unicode) def truncate_words(s, num): "Truncates a string after a certain number of words." + s = force_unicode(s) length = int(num) words = s.split() if len(words) > length: words = words[:length] if not words[-1].endswith('...'): words.append('...') - return ' '.join(words) + return u' '.join(words) +truncate_words = allow_lazy(truncate_words, unicode) def truncate_html_words(s, num): """ - Truncates html to a certain number of words (not counting tags and comments). - Closes opened tags if they were correctly closed in the given html. + Truncates html to a certain number of words (not counting tags and + comments). Closes opened tags if they were correctly closed in the given + html. """ + s = force_unicode(s) length = int(num) if length <= 0: - return '' + return u'' html4_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area', 'hr', 'input') # Set up regular expressions - re_words = re.compile(r'&.*?;|<.*?>|([A-Za-z0-9][\w-]*)') + re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U) re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>') # Count non-HTML words and keep note of open tags pos = 0 @@ -100,6 +109,7 @@ def truncate_html_words(s, num): out += '</%s>' % tag # Return string return out +truncate_html_words = allow_lazy(truncate_html_words, unicode) def get_valid_filename(s): """ @@ -110,10 +120,11 @@ def get_valid_filename(s): >>> get_valid_filename("john's portrait in 2004.jpg") 'johns_portrait_in_2004.jpg' """ - s = s.strip().replace(' ', '_') + s = force_unicode(s).strip().replace(' ', '_') return re.sub(r'[^-A-Za-z0-9_.]', '', s) +get_valid_filename = allow_lazy(get_valid_filename, unicode) -def get_text_list(list_, last_word='or'): +def get_text_list(list_, last_word=ugettext_lazy(u'or')): """ >>> get_text_list(['a', 'b', 'c', 'd']) 'a, b, c or d' @@ -126,23 +137,22 @@ def get_text_list(list_, last_word='or'): >>> get_text_list([]) '' """ - if len(list_) == 0: return '' - if len(list_) == 1: return list_[0] - return '%s %s %s' % (', '.join([str(i) for i in list_][:-1]), last_word, list_[-1]) + if len(list_) == 0: return u'' + if len(list_) == 1: return force_unicode(list_[0]) + return u'%s %s %s' % (', '.join([force_unicode(i) for i in list_][:-1]), force_unicode(last_word), force_unicode(list_[-1])) +get_text_list = allow_lazy(get_text_list, unicode) def normalize_newlines(text): - return re.sub(r'\r\n|\r|\n', '\n', text) + return force_unicode(re.sub(r'\r\n|\r|\n', '\n', text)) +normalize_newlines = allow_lazy(normalize_newlines, unicode) def recapitalize(text): "Recapitalizes text, placing caps after end-of-sentence punctuation." -# capwords = () - text = text.lower() + text = force_unicode(text).lower() capsRE = re.compile(r'(?:^|(?<=[\.\?\!] ))([a-z])') text = capsRE.sub(lambda x: x.group(1).upper(), text) -# for capword in capwords: -# capwordRE = re.compile(r'\b%s\b' % capword, re.I) -# text = capwordRE.sub(capword, text) return text +recapitalize = allow_lazy(recapitalize) def phone2numeric(phone): "Converts a phone number with letters into its numeric equivalent." @@ -153,6 +163,7 @@ def phone2numeric(phone): 's': '7', 'r': '7', 'u': '8', 't': '8', 'w': '9', 'v': '8', 'y': '9', 'x': '9'}.get(m.group(0).lower()) return letters.sub(char2number, phone) +phone2numeric = allow_lazy(phone2numeric) # From http://www.xhaus.com/alan/python/httpcomp.html#gzip # Used with permission. @@ -172,7 +183,7 @@ def javascript_quote(s, quote_double_quotes=False): return r"\u%04x" % ord(match.group(1)) if type(s) == str: - s = s.decode(settings.DEFAULT_CHARSET) + s = s.decode('utf-8') elif type(s) != unicode: raise TypeError, s s = s.replace('\\', '\\\\') @@ -183,6 +194,7 @@ def javascript_quote(s, quote_double_quotes=False): if quote_double_quotes: s = s.replace('"', '"') return str(ustring_re.sub(fix, s)) +javascript_quote = allow_lazy(javascript_quote, unicode) smart_split_re = re.compile('("(?:[^"\\\\]*(?:\\\\.[^"\\\\]*)*)"|\'(?:[^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'|[^\\s]+)') def smart_split(text): @@ -195,6 +207,7 @@ def smart_split(text): >>> list(smart_split('This is "a person\'s" test.')) ['This', 'is', '"a person\'s"', 'test.'] """ + text = force_unicode(text) for bit in smart_split_re.finditer(text): bit = bit.group(0) if bit[0] == '"' and bit[-1] == '"': @@ -203,3 +216,5 @@ def smart_split(text): yield "'" + bit[1:-1].replace("\\'", "'").replace("\\\\", "\\") + "'" else: yield bit +smart_split = allow_lazy(smart_split, unicode) + diff --git a/django/utils/timesince.py b/django/utils/timesince.py index 394f818395..455788e7d7 100644 --- a/django/utils/timesince.py +++ b/django/utils/timesince.py @@ -1,6 +1,6 @@ import datetime, math, time from django.utils.tzinfo import LocalTimezone -from django.utils.translation import ngettext, gettext +from django.utils.translation import ungettext, ugettext def timesince(d, now=None): """ @@ -9,12 +9,12 @@ def timesince(d, now=None): Adapted from http://blog.natbat.co.uk/archive/2003/Jun/14/time_since """ chunks = ( - (60 * 60 * 24 * 365, lambda n: ngettext('year', 'years', n)), - (60 * 60 * 24 * 30, lambda n: ngettext('month', 'months', n)), - (60 * 60 * 24 * 7, lambda n : ngettext('week', 'weeks', n)), - (60 * 60 * 24, lambda n : ngettext('day', 'days', n)), - (60 * 60, lambda n: ngettext('hour', 'hours', n)), - (60, lambda n: ngettext('minute', 'minutes', n)) + (60 * 60 * 24 * 365, lambda n: ungettext('year', 'years', n)), + (60 * 60 * 24 * 30, lambda n: ungettext('month', 'months', n)), + (60 * 60 * 24 * 7, lambda n : ungettext('week', 'weeks', n)), + (60 * 60 * 24, lambda n : ungettext('day', 'days', n)), + (60 * 60, lambda n: ungettext('hour', 'hours', n)), + (60, lambda n: ungettext('minute', 'minutes', n)) ) # Convert datetime.date to datetime.datetime for comparison if d.__class__ is not datetime.datetime: @@ -33,18 +33,16 @@ def timesince(d, now=None): delta = now - (d - datetime.timedelta(0, 0, d.microsecond)) since = delta.days * 24 * 60 * 60 + delta.seconds for i, (seconds, name) in enumerate(chunks): - count = since / seconds + count = since // seconds if count != 0: break - if count < 0: - return gettext('%d milliseconds') % math.floor((now - d).microseconds / 1000) - s = gettext('%(number)d %(type)s') % {'number': count, 'type': name(count)} + s = ugettext('%(number)d %(type)s') % {'number': count, 'type': name(count)} if i + 1 < len(chunks): # Now get the second item seconds2, name2 = chunks[i + 1] - count2 = (since - (seconds * count)) / seconds2 + count2 = (since - (seconds * count)) // seconds2 if count2 != 0: - s += gettext(', %(number)d %(type)s') % {'number': count2, 'type': name2(count2)} + s += ugettext(', %(number)d %(type)s') % {'number': count2, 'type': name2(count2)} return s def timeuntil(d, now=None): diff --git a/django/utils/translation/__init__.py b/django/utils/translation/__init__.py index dbb97af76c..13fc8a847a 100644 --- a/django/utils/translation/__init__.py +++ b/django/utils/translation/__init__.py @@ -7,7 +7,8 @@ __all__ = ['gettext', 'gettext_noop', 'gettext_lazy', 'ngettext', 'ngettext_lazy', 'string_concat', 'activate', 'deactivate', 'get_language', 'get_language_bidi', 'get_date_formats', 'get_partial_date_formats', 'check_for_language', 'to_locale', - 'get_language_from_request', 'install', 'templatize'] + 'get_language_from_request', 'install', 'templatize', 'ugettext', + 'ungettext', 'deactivate_all'] # Here be dragons, so a short explanation of the logic won't hurt: # We are trying to solve two problems: (1) access settings, in particular @@ -48,19 +49,28 @@ del g, delayed_loader def gettext_noop(message): return real_gettext_noop(message) +ugettext_noop = gettext_noop + def gettext(message): return real_gettext(message) - def ngettext(singular, plural, number): return real_ngettext(singular, plural, number) +def ugettext(message): + return real_ugettext(message) + +def ungettext(singular, plural, number): + return real_ungettext(singular, plural, number) + def string_concat(*strings): return real_string_concat(*strings) -ngettext_lazy = lazy(ngettext, str, unicode) -gettext_lazy = lazy(gettext, str, unicode) -string_concat = lazy(string_concat, str, unicode) +ngettext_lazy = lazy(ngettext, str) +gettext_lazy = lazy(gettext, str) +ungettext_lazy = lazy(ungettext, unicode) +ugettext_lazy = lazy(ugettext, unicode) +string_concat = lazy(string_concat, unicode) def activate(language): return real_activate(language) @@ -95,3 +105,6 @@ def install(): def templatize(src): return real_templatize(src) +def deactivate_all(): + return real_deactivate_all() + diff --git a/django/utils/translation/trans_null.py b/django/utils/translation/trans_null.py index 10b07529e3..e3f89567a5 100644 --- a/django/utils/translation/trans_null.py +++ b/django/utils/translation/trans_null.py @@ -3,15 +3,19 @@ # settings.USE_I18N = False can use this module rather than trans_real.py. from django.conf import settings +from django.utils.encoding import force_unicode def ngettext(singular, plural, number): if number == 1: return singular return plural ngettext_lazy = ngettext -string_concat = lambda *strings: ''.join([str(el) for el in strings]) +def ungettext(singular, plural, number): + return force_unicode(ngettext(singular, plural, number)) + +string_concat = lambda *strings: u''.join([force_unicode(el) for el in strings]) activate = lambda x: None -deactivate = install = lambda: None +deactivate = deactivate_all = install = lambda: None get_language = lambda: settings.LANGUAGE_CODE get_language_bidi = lambda: settings.LANGUAGE_CODE in settings.LANGUAGES_BIDI get_date_formats = lambda: (settings.DATE_FORMAT, settings.DATETIME_FORMAT, settings.TIME_FORMAT) @@ -30,6 +34,9 @@ TECHNICAL_ID_MAP = { def gettext(message): return TECHNICAL_ID_MAP.get(message, message) +def ugettext(message): + return force_unicode(gettext(message)) + gettext_noop = gettext_lazy = _ = gettext def to_locale(language): diff --git a/django/utils/translation/trans_real.py b/django/utils/translation/trans_real.py index 293b4ef9cd..765152afce 100644 --- a/django/utils/translation/trans_real.py +++ b/django/utils/translation/trans_real.py @@ -3,6 +3,7 @@ import os, re, sys import gettext as gettext_module from cStringIO import StringIO +from django.utils.encoding import force_unicode try: import threading @@ -57,10 +58,10 @@ class DjangoTranslation(gettext_module.GNUTranslations): # the output charset. Before 2.4, the output charset is # identical with the translation file charset. try: - self.set_output_charset(settings.DEFAULT_CHARSET) + self.set_output_charset('utf-8') except AttributeError: pass - self.django_output_charset = settings.DEFAULT_CHARSET + self.django_output_charset = 'utf-8' self.__language = '??' def merge(self, other): @@ -202,6 +203,14 @@ def deactivate(): if currentThread() in _active: del _active[currentThread()] +def deactivate_all(): + """ + Makes the active translation object a NullTranslations() instance. This is + useful when we want delayed translations to appear as the original string + for some reason. + """ + _active[currentThread()] = gettext_module.NullTranslations() + def get_language(): "Returns the currently selected language." t = _active.get(currentThread(), None) @@ -238,49 +247,72 @@ def catalog(): _default = translation(settings.LANGUAGE_CODE) return _default -def gettext(message): +def do_translate(message, translation_function): """ - This function will be patched into the builtins module to provide the _ - helper function. It will use the current thread as a discriminator to find - the translation object to use. If no current translation is activated, the - message will be run through the default translation object. + Translate 'message' using the given 'translation_function' name -- which + will be either gettext or ugettext. """ global _default, _active t = _active.get(currentThread(), None) if t is not None: - return t.gettext(message) + return getattr(t, translation_function)(message) if _default is None: from django.conf import settings _default = translation(settings.LANGUAGE_CODE) - return _default.gettext(message) + return getattr(_default, translation_function)(message) + +def gettext(message): + """ + This function will be patched into the builtins module to provide the _ + helper function. It will use the current thread as a discriminator to find + the translation object to use. If no current translation is activated, the + message will be run through the default translation object. + """ + return do_translate(message, 'gettext') + +def ugettext(message): + return do_translate(message, 'ugettext') def gettext_noop(message): """ Marks strings for translation but doesn't translate them now. This can be used to store strings in global variables that should stay in the base - language (because they might be used externally) and will be translated later. + language (because they might be used externally) and will be translated + later. """ return message -def ngettext(singular, plural, number): - """ - Returns the translation of either the singular or plural, based on the number. - """ +def do_ntranslate(singular, plural, number, translation_function): global _default, _active t = _active.get(currentThread(), None) if t is not None: - return t.ngettext(singular, plural, number) + return getattr(t, translation_function)(singular, plural, number) if _default is None: from django.conf import settings _default = translation(settings.LANGUAGE_CODE) - return _default.ngettext(singular, plural, number) + return getattr(_default, translation_function)(singular, plural, number) + +def ngettext(singular, plural, number): + """ + Returns a UTF-8 bytestring of the translation of either the singular or + plural, based on the number. + """ + return do_ntranslate(singular, plural, number, 'ngettext') + +def ungettext(singular, plural, number): + """ + Returns a unicode strings of the translation of either the singular or + plural, based on the number. + """ + return do_ntranslate(singular, plural, number, 'ungettext') def check_for_language(lang_code): """ - Checks whether there is a global language file for the given language code. - This is used to decide whether a user-provided language is available. This is - only used for language codes from either the cookies or session. + Checks whether there is a global language file for the given language + code. This is used to decide whether a user-provided language is + available. This is only used for language codes from either the cookies or + session. """ from django.conf import settings globalpath = os.path.join(os.path.dirname(sys.modules[settings.__module__].__file__), 'locale') @@ -291,9 +323,10 @@ def check_for_language(lang_code): def get_language_from_request(request): """ - Analyzes the request to find what language the user wants the system to show. - Only languages listed in settings.LANGUAGES are taken into account. If the user - requests a sublanguage where we have a main language, we send out the main language. + Analyzes the request to find what language the user wants the system to + show. Only languages listed in settings.LANGUAGES are taken into account. + If the user requests a sublanguage where we have a main language, we send + out the main language. """ global _accepted from django.conf import settings @@ -355,9 +388,9 @@ def get_date_formats(): one, the formats provided in the settings will be used. """ from django.conf import settings - date_format = _('DATE_FORMAT') - datetime_format = _('DATETIME_FORMAT') - time_format = _('TIME_FORMAT') + date_format = ugettext('DATE_FORMAT') + datetime_format = ugettext('DATETIME_FORMAT') + time_format = ugettext('TIME_FORMAT') if date_format == 'DATE_FORMAT': date_format = settings.DATE_FORMAT if datetime_format == 'DATETIME_FORMAT': @@ -373,8 +406,8 @@ def get_partial_date_formats(): one, the formats provided in the settings will be used. """ from django.conf import settings - year_month_format = _('YEAR_MONTH_FORMAT') - month_day_format = _('MONTH_DAY_FORMAT') + year_month_format = ugettext('YEAR_MONTH_FORMAT') + month_day_format = ugettext('MONTH_DAY_FORMAT') if year_month_format == 'YEAR_MONTH_FORMAT': year_month_format = settings.YEAR_MONTH_FORMAT if month_day_format == 'MONTH_DAY_FORMAT': @@ -483,9 +516,7 @@ def templatize(src): def string_concat(*strings): """" - lazy variant of string concatenation, needed for translations that are - constructed from multiple parts. Handles lazy strings and non-strings by - first turning all arguments to strings, before joining them. + Lazy variant of string concatenation, needed for translations that are + constructed from multiple parts. """ - return ''.join([str(el) for el in strings]) - + return u''.join([force_unicode(s) for s in strings]) diff --git a/django/utils/tzinfo.py b/django/utils/tzinfo.py index cc9f028e91..e2e1d10fc1 100644 --- a/django/utils/tzinfo.py +++ b/django/utils/tzinfo.py @@ -1,13 +1,17 @@ "Implementation of tzinfo classes for use with datetime.datetime." +import locale import time from datetime import timedelta, tzinfo +from django.utils.encoding import smart_unicode + +DEFAULT_ENCODING = locale.getdefaultlocale()[1] or 'ascii' class FixedOffset(tzinfo): "Fixed offset in minutes east from UTC." def __init__(self, offset): self.__offset = timedelta(minutes=offset) - self.__name = "%+03d%02d" % (offset // 60, offset % 60) + self.__name = u"%+03d%02d" % (offset // 60, offset % 60) def __repr__(self): return self.__name @@ -25,7 +29,7 @@ class LocalTimezone(tzinfo): "Proxy timezone information from time module." def __init__(self, dt): tzinfo.__init__(self, dt) - self._tzname = time.tzname[self._isdst(dt)] + self._tzname = self.tzname(dt) def __repr__(self): return self._tzname @@ -43,7 +47,10 @@ class LocalTimezone(tzinfo): return timedelta(0) def tzname(self, dt): - return time.tzname[self._isdst(dt)] + try: + return smart_unicode(time.tzname[self._isdst(dt)], DEFAULT_ENCODING) + except UnicodeDecodeError: + return None def _isdst(self, dt): tt = (dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second, dt.weekday(), 0, -1) diff --git a/django/utils/version.py b/django/utils/version.py new file mode 100644 index 0000000000..cf8085653f --- /dev/null +++ b/django/utils/version.py @@ -0,0 +1,39 @@ +import django +import os.path +import re + +def get_svn_revision(path=None): + """ + Returns the SVN revision in the form SVN-XXXX, + where XXXX is the revision number. + + Returns SVN-unknown if anything goes wrong, such as an unexpected + format of internal SVN files. + + If path is provided, it should be a directory whose SVN info you want to + inspect. If it's not provided, this will use the root django/ package + directory. + """ + rev = None + if path is None: + path = django.__path__[0] + entries_path = '%s/.svn/entries' % path + + if os.path.exists(entries_path): + entries = open(entries_path, 'r').read() + # Versions >= 7 of the entries file are flat text. The first line is + # the version number. The next set of digits after 'dir' is the revision. + if re.match('(\d+)', entries): + rev_match = re.search('\d+\s+dir\s+(\d+)', entries) + if rev_match: + rev = rev_match.groups()[0] + # Older XML versions of the file specify revision as an attribute of + # the first entries node. + else: + from xml.dom import minidom + dom = minidom.parse(entries_path) + rev = dom.getElementsByTagName('entry')[0].getAttribute('revision') + + if rev: + return u'SVN-%s' % rev + return u'SVN-unknown' |
