diff options
| author | Adrian Holovaty <adrian@holovaty.com> | 2005-07-13 01:25:57 +0000 |
|---|---|---|
| committer | Adrian Holovaty <adrian@holovaty.com> | 2005-07-13 01:25:57 +0000 |
| commit | ed114e15106192b22ebb78ef5bf5bce72b419d13 (patch) | |
| tree | f7c27f035cca8d50bd69e2ecbd7497fccec4a35a /django/utils | |
| parent | 07ffc7d605cc96557db28a9e35da69bc0719611b (diff) | |
Imported Django from private SVN repository (created from r. 8825)
git-svn-id: http://code.djangoproject.com/svn/django/trunk@3 bcc190cf-cafb-0310-a4f2-bffc1f526a37
Diffstat (limited to 'django/utils')
| -rw-r--r-- | django/utils/__init__.py | 0 | ||||
| -rw-r--r-- | django/utils/datastructures.py | 171 | ||||
| -rw-r--r-- | django/utils/dateformat.py | 317 | ||||
| -rw-r--r-- | django/utils/dates.py | 27 | ||||
| -rw-r--r-- | django/utils/feedgenerator.py | 152 | ||||
| -rw-r--r-- | django/utils/html.py | 110 | ||||
| -rw-r--r-- | django/utils/httpwrappers.py | 319 | ||||
| -rw-r--r-- | django/utils/images.py | 22 | ||||
| -rw-r--r-- | django/utils/stopwords.py | 42 | ||||
| -rw-r--r-- | django/utils/text.py | 108 | ||||
| -rw-r--r-- | django/utils/timesince.py | 46 | ||||
| -rw-r--r-- | django/utils/xmlutils.py | 13 |
12 files changed, 1327 insertions, 0 deletions
diff --git a/django/utils/__init__.py b/django/utils/__init__.py new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/django/utils/__init__.py diff --git a/django/utils/datastructures.py b/django/utils/datastructures.py new file mode 100644 index 0000000000..c1cb6193f5 --- /dev/null +++ b/django/utils/datastructures.py @@ -0,0 +1,171 @@ +class MergeDict: + """ + A simple class for creating new "virtual" dictionaries that actualy look + up values in more than one dictionary, passed in the constructor. + """ + def __init__(self, *dicts): + self.dicts = dicts + + def __getitem__(self, key): + for dict in self.dicts: + try: + return dict[key] + except KeyError: + pass + raise KeyError + + def get(self, key, default): + try: + return self[key] + except KeyError: + return default + + def getlist(self, key): + for dict in self.dicts: + try: + return dict.getlist(key) + except KeyError: + pass + raise KeyError + + def items(self): + item_list = [] + for dict in self.dicts: + item_list.extend(dict.items()) + return item_list + + def has_key(self, key): + for dict in self.dicts: + if dict.has_key(key): + return True + return False + +class MultiValueDictKeyError(KeyError): + pass + +class MultiValueDict: + """ + A dictionary-like class customized to deal with multiple values for the same key. + + >>> d = MultiValueDict({'name': ['Adrian', 'Simon'], 'position': ['Developer']}) + >>> d['name'] + 'Simon' + >>> d.getlist('name') + ['Adrian', 'Simon'] + >>> d.get('lastname', 'nonexistent') + 'nonexistent' + >>> d.setlist('lastname', ['Holovaty', 'Willison']) + + This class exists to solve the irritating problem raised by cgi.parse_qs, + which returns a list for every key, even though most Web forms submit + single name-value pairs. + """ + def __init__(self, key_to_list_mapping=None): + self.data = key_to_list_mapping or {} + + def __repr__(self): + return repr(self.data) + + def __getitem__(self, key): + "Returns the data value for this key; raises KeyError if not found" + if self.data.has_key(key): + try: + return self.data[key][-1] # in case of duplicates, use last value ([-1]) + except IndexError: + return [] + raise MultiValueDictKeyError, "Key '%s' not found in MultiValueDict %s" % (key, self.data) + + def __setitem__(self, key, value): + self.data[key] = [value] + + def __len__(self): + return len(self.data) + + def get(self, key, default): + "Returns the default value if the requested data doesn't exist" + try: + val = self[key] + except (KeyError, IndexError): + return default + if val == []: + return default + return val + + def getlist(self, key): + "Returns an empty list if the requested data doesn't exist" + try: + return self.data[key] + except KeyError: + return [] + + def setlist(self, key, list_): + self.data[key] = list_ + + def appendlist(self, key, item): + "Appends an item to the internal list associated with key" + try: + self.data[key].append(item) + except KeyError: + self.data[key] = [item] + + def has_key(self, key): + return self.data.has_key(key) + + def items(self): + # we don't just return self.data.items() here, because we want to use + # self.__getitem__() to access the values as *strings*, not lists + return [(key, self[key]) for key in self.data.keys()] + + def keys(self): + return self.data.keys() + + def update(self, other_dict): + if isinstance(other_dict, MultiValueDict): + for key, value_list in other_dict.data.items(): + self.data.setdefault(key, []).extend(value_list) + elif type(other_dict) == type({}): + for key, value in other_dict.items(): + self.data.setdefault(key, []).append(value) + else: + raise ValueError, "MultiValueDict.update() takes either a MultiValueDict or dictionary" + + def copy(self): + "Returns a copy of this object" + import copy + cp = copy.deepcopy(self) + return cp + +class DotExpandedDict(dict): + """ + A special dictionary constructor that takes a dictionary in which the keys + may contain dots to specify inner dictionaries. It's confusing, but this + example should make sense. + + >>> d = DotExpandedDict({'person.1.firstname': ['Simon'], + 'person.1.lastname': ['Willison'], + 'person.2.firstname': ['Adrian'], + 'person.2.lastname': ['Holovaty']}) + >>> d + {'person': {'1': {'lastname': ['Willison'], 'firstname': ['Simon']}, + '2': {'lastname': ['Holovaty'], 'firstname': ['Adrian']}}} + >>> d['person'] + {'1': {'firstname': ['Simon'], 'lastname': ['Willison'], + '2': {'firstname': ['Adrian'], 'lastname': ['Holovaty']} + >>> d['person']['1'] + {'firstname': ['Simon'], 'lastname': ['Willison']} + + # Gotcha: Results are unpredictable if the dots are "uneven": + >>> DotExpandedDict({'c.1': 2, 'c.2': 3, 'c': 1}) + >>> {'c': 1} + """ + def __init__(self, key_to_list_mapping): + for k, v in key_to_list_mapping.items(): + current = self + bits = k.split('.') + for bit in bits[:-1]: + current = current.setdefault(bit, {}) + # Now assign value to current position + try: + current[bits[-1]] = v + except TypeError: # Special-case if current isn't a dict. + current = {bits[-1]: v} diff --git a/django/utils/dateformat.py b/django/utils/dateformat.py new file mode 100644 index 0000000000..9913f01a57 --- /dev/null +++ b/django/utils/dateformat.py @@ -0,0 +1,317 @@ +""" +PHP date() style date formatting +See http://www.php.net/date for format strings + +Usage: +>>> import datetime +>>> d = datetime.datetime.now() +>>> df = DateFormat(d) +>>> print df.format('jS F Y H:i') +7th October 2003 11:39 +>>> +""" + +from calendar import isleap +from dates import MONTHS, MONTHS_AP, WEEKDAYS + +class DateFormat: + year_days = [None, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334] + + def __init__(self, d): + self.date = d + + def a(self): + "'a.m.' or 'p.m.'" + if self.date.hour > 11: + return 'p.m.' + return 'a.m.' + + def A(self): + "'AM' or 'PM'" + if self.date.hour > 11: + return 'PM' + return 'AM' + + def B(self): + "Swatch Internet time" + raise NotImplementedError + + def d(self): + "Day of the month, 2 digits with leading zeros; i.e. '01' to '31'" + return '%02d' % self.date.day + + def D(self): + "Day of the week, textual, 3 letters; e.g. 'Fri'" + return WEEKDAYS[self.date.weekday()][0:3] + + def f(self): + """ + Time, in 12-hour hours and minutes, with minutes left off if they're zero. + Examples: '1', '1:30', '2:05', '2' + Proprietary extension. + """ + if self.date.minute == 0: + return self.g() + return '%s:%s' % (self.g(), self.i()) + + def F(self): + "Month, textual, long; e.g. 'January'" + return MONTHS[self.date.month] + + def g(self): + "Hour, 12-hour format without leading zeros; i.e. '1' to '12'" + if self.date.hour == 0: + return 12 + if self.date.hour > 12: + return self.date.hour - 12 + return self.date.hour + + def G(self): + "Hour, 24-hour format without leading zeros; i.e. '0' to '23'" + return self.date.hour + + def h(self): + "Hour, 12-hour format; i.e. '01' to '12'" + return '%02d' % self.g() + + def H(self): + "Hour, 24-hour format; i.e. '00' to '23'" + return '%02d' % self.G() + + def i(self): + "Minutes; i.e. '00' to '59'" + return '%02d' % self.date.minute + + def I(self): + "'1' if Daylight Savings Time, '0' otherwise." + raise NotImplementedError + + def j(self): + "Day of the month without leading zeros; i.e. '1' to '31'" + return self.date.day + + def l(self): + "Day of the week, textual, long; e.g. 'Friday'" + return WEEKDAYS[self.date.weekday()] + + def L(self): + "Boolean for whether it is a leap year; i.e. True or False" + return isleap(self.date.year) + + def m(self): + "Month; i.e. '01' to '12'" + return '%02d' % self.date.month + + def M(self): + "Month, textual, 3 letters; e.g. 'Jan'" + return MONTHS[self.date.month][0:3] + + def n(self): + "Month without leading zeros; i.e. '1' to '12'" + return self.date.month + + def N(self): + "Month abbreviation in Associated Press style. Proprietary extension." + return MONTHS_AP[self.date.month] + + def O(self): + "Difference to Greenwich time in hours; e.g. '+0200'" + raise NotImplementedError + + def P(self): + """ + Time, in 12-hour hours, minutes and 'a.m.'/'p.m.', with minutes left off + if they're zero and the strings 'midnight' and 'noon' if appropriate. + Examples: '1 a.m.', '1:30 p.m.', 'midnight', 'noon', '12:30 p.m.' + Proprietary extension. + """ + if self.date.minute == 0 and self.date.hour == 0: + return 'midnight' + if self.date.minute == 0 and self.date.hour == 12: + return 'noon' + return '%s %s' % (self.f(), self.a()) + + def r(self): + "RFC 822 formatted date; e.g. 'Thu, 21 Dec 2000 16:01:07 +0200'" + raise NotImplementedError + + def s(self): + "Seconds; i.e. '00' to '59'" + return '%02d' % self.date.second + + def S(self): + "English ordinal suffix for the day of the month, 2 characters; i.e. 'st', 'nd', 'rd' or 'th'" + if self.date.day in (11, 12, 13): # Special case + return 'th' + last = self.date.day % 10 + if last == 1: + return 'st' + if last == 2: + return 'nd' + if last == 3: + return 'rd' + return 'th' + + def t(self): + "Number of days in the given month; i.e. '28' to '31'" + raise NotImplementedError + + def T(self): + "Time zone of this machine; e.g. 'EST' or 'MDT'" + raise NotImplementedError + + def U(self): + "Seconds since the Unix epoch (January 1 1970 00:00:00 GMT)" + raise NotImplementedError + + def w(self): + "Day of the week, numeric, i.e. '0' (Sunday) to '6' (Saturday)" + weekday = self.date.weekday() + if weekday == 0: + return 6 + return weekday - 1 + + def W(self): + "ISO-8601 week number of year, weeks starting on Monday" + # Algorithm from http://www.personal.ecu.edu/mccartyr/ISOwdALG.txt + week_number = None + jan1_weekday = self.date.replace(month=1, day=1).weekday() + 1 + weekday = self.date.weekday() + 1 + day_of_year = self.z() + if day_of_year <= (8 - jan1_weekday) and jan1_weekday > 4: + if jan1_weekday == 5 or (jan1_weekday == 6 and isleap(self.date.year-1)): + week_number = 53 + else: + week_number = 52 + else: + if isleap(self.date.year): + i = 366 + else: + i = 365 + if (i - day_of_year) < (4 - weekday): + week_number = 1 + else: + j = day_of_year + (7 - weekday) + (jan1_weekday - 1) + week_number = j / 7 + if jan1_weekday > 4: + week_number -= 1 + return week_number + + def Y(self): + "Year, 4 digits; e.g. '1999'" + return self.date.year + + def y(self): + "Year, 2 digits; e.g. '99'" + return str(self.date.year)[2:] + + def z(self): + "Day of the year; i.e. '0' to '365'" + doy = self.year_days[self.date.month] + self.date.day + if self.L() and self.date.month > 2: + doy += 1 + return doy + + def Z(self): + """Time zone offset in seconds (i.e. '-43200' to '43200'). The offset + for timezones west of UTC is always negative, and for those east of UTC + is always positive.""" + raise NotImplementedError + + def format(self, formatstr): + result = '' + for char in formatstr: + try: + result += str(getattr(self, char)()) + except AttributeError: + result += char + return result + +class TimeFormat: + def __init__(self, t): + self.time = t + + def a(self): + "'a.m.' or 'p.m.'" + if self.time.hour > 11: + return 'p.m.' + else: + return 'a.m.' + + def A(self): + "'AM' or 'PM'" + return self.a().upper() + + def B(self): + "Swatch Internet time" + raise NotImplementedError + + def f(self): + """ + Time, in 12-hour hours and minutes, with minutes left off if they're zero. + Examples: '1', '1:30', '2:05', '2' + Proprietary extension. + """ + if self.time.minute == 0: + return self.g() + return '%s:%s' % (self.g(), self.i()) + + def g(self): + "Hour, 12-hour format without leading zeros; i.e. '1' to '12'" + if self.time.hour == 0: + return 12 + if self.time.hour > 12: + return self.time.hour - 12 + return self.time.hour + + def G(self): + "Hour, 24-hour format without leading zeros; i.e. '0' to '23'" + return self.time.hour + + def h(self): + "Hour, 12-hour format; i.e. '01' to '12'" + return '%02d' % self.g() + + def H(self): + "Hour, 24-hour format; i.e. '00' to '23'" + return '%02d' % self.G() + + def i(self): + "Minutes; i.e. '00' to '59'" + return '%02d' % self.time.minute + + def P(self): + """ + Time, in 12-hour hours, minutes and 'a.m.'/'p.m.', with minutes left off + if they're zero and the strings 'midnight' and 'noon' if appropriate. + Examples: '1 a.m.', '1:30 p.m.', 'midnight', 'noon', '12:30 p.m.' + Proprietary extension. + """ + if self.time.minute == 0 and self.time.hour == 0: + return 'midnight' + if self.time.minute == 0 and self.time.hour == 12: + return 'noon' + return '%s %s' % (self.f(), self.a()) + + def s(self, s): + "Seconds; i.e. '00' to '59'" + return '%02d' % self.time.second + + def format(self, formatstr): + result = '' + for char in formatstr: + try: + result += str(getattr(self, char)()) + except AttributeError: + result += char + return result + +def format(value, format_string): + "Convenience function" + df = DateFormat(value) + return df.format(format_string) + +def time_format(value, format_string): + "Convenience function" + tf = TimeFormat(value) + return tf.format(format_string) diff --git a/django/utils/dates.py b/django/utils/dates.py new file mode 100644 index 0000000000..2ae0cc1a6e --- /dev/null +++ b/django/utils/dates.py @@ -0,0 +1,27 @@ +"Commonly-used date structures" + +WEEKDAYS = { + 0:'Monday', 1:'Tuesday', 2:'Wednesday', 3:'Thursday', 4:'Friday', + 5:'Saturday', 6:'Sunday' +} +WEEKDAYS_REV = { + 'monday':0, 'tuesday':1, 'wednesday':2, 'thursday':3, 'friday':4, + 'saturday':5, 'sunday':6 +} +MONTHS = { + 1:'January', 2:'February', 3:'March', 4:'April', 5:'May', 6:'June', + 7:'July', 8:'August', 9:'September', 10:'October', 11:'November', + 12:'December' +} +MONTHS_3 = { + 1:'jan', 2:'feb', 3:'mar', 4:'apr', 5:'may', 6:'jun', 7:'jul', 8:'aug', + 9:'sep', 10:'oct', 11:'nov', 12:'dec' +} +MONTHS_3_REV = { + 'jan':1, 'feb':2, 'mar':3, 'apr':4, 'may':5, 'jun':6, 'jul':7, 'aug':8, + 'sep':9, 'oct':10, 'nov':11, 'dec':12 +} +MONTHS_AP = { # month names in Associated Press style + 1:'Jan.', 2:'Feb.', 3:'March', 4:'April', 5:'May', 6:'June', 7:'July', + 8:'Aug.', 9:'Sept.', 10:'Oct.', 11:'Nov.', 12:'Dec.' +} diff --git a/django/utils/feedgenerator.py b/django/utils/feedgenerator.py new file mode 100644 index 0000000000..dc5dd31fe4 --- /dev/null +++ b/django/utils/feedgenerator.py @@ -0,0 +1,152 @@ +""" +Syndication feed generation library -- used for generating RSS, etc. + +By Adrian Holovaty +Released under the Python license + +Sample usage: + +>>> feed = feedgenerator.Rss201rev2Feed( +... title=u"Poynter E-Media Tidbits", +... link=u"http://www.poynter.org/column.asp?id=31", +... description=u"A group weblog by the sharpest minds in online media/journalism/publishing.", +... language=u"en", +... ) +>>> feed.add_item(title="Hello", link=u"http://www.holovaty.com/test/", description="Testing.") +>>> fp = open('test.rss', 'w') +>>> feed.write(fp, 'utf-8') +>>> fp.close() + +For definitions of the different versions of RSS, see: +http://diveintomark.org/archives/2004/02/04/incompatible-rss +""" + +from django.utils.xmlutils import SimplerXMLGenerator + +class SyndicationFeed: + "Base class for all syndication feeds. Subclasses should provide write()" + def __init__(self, title, link, description, language=None): + self.feed_info = { + 'title': title, + 'link': link, + 'description': description, + 'language': language, + } + self.items = [] + + def add_item(self, title, link, description, author_email=None, + author_name=None, pubdate=None, comments=None, unique_id=None, + enclosure=None): + """ + Adds an item to the feed. All args are expected to be Python Unicode + objects except pubdate, which is a datetime.datetime object, and + enclosure, which is an instance of the Enclosure class. + """ + self.items.append({ + 'title': title, + 'link': link, + 'description': description, + 'author_email': author_email, + 'author_name': author_name, + 'pubdate': pubdate, + 'comments': comments, + 'unique_id': unique_id, + 'enclosure': enclosure, + }) + + def num_items(self): + return len(self.items) + + def write(self, outfile, encoding): + """ + Outputs the feed in the given encoding to outfile, which is a file-like + object. Subclasses should override this. + """ + raise NotImplementedError + + def writeString(self, encoding): + """ + Returns the feed in the given encoding as a string. + """ + from StringIO import StringIO + s = StringIO() + self.write(s, encoding) + return s.getvalue() + +class Enclosure: + "Represents an RSS enclosure" + def __init__(self, url, length, mime_type): + "All args are expected to be Python Unicode objects" + self.url, self.length, self.mime_type = url, length, mime_type + +class RssFeed(SyndicationFeed): + def write(self, outfile, encoding): + handler = SimplerXMLGenerator(outfile, encoding) + handler.startDocument() + self.writeRssElement(handler) + self.writeChannelElement(handler) + for item in self.items: + self.writeRssItem(handler, item) + self.endChannelElement(handler) + self.endRssElement(handler) + + def writeRssElement(self, handler): + "Adds the <rss> element to handler, taking care of versioning, etc." + raise NotImplementedError + + def endRssElement(self, handler): + "Ends the <rss> element." + handler.endElement(u"rss") + + def writeChannelElement(self, handler): + handler.startElement(u"channel", {}) + handler.addQuickElement(u"title", self.feed_info['title'], {}) + handler.addQuickElement(u"link", self.feed_info['link'], {}) + handler.addQuickElement(u"description", self.feed_info['description'], {}) + if self.feed_info['language'] is not None: + handler.addQuickElement(u"language", self.feed_info['language'], {}) + + def endChannelElement(self, handler): + handler.endElement(u"channel") + +class RssUserland091Feed(RssFeed): + def startRssElement(self, handler): + handler.startElement(u"rss", {u"version": u"0.91"}) + + def writeRssItem(self, handler, item): + handler.startElement(u"item", {}) + handler.addQuickElement(u"title", item['title'], {}) + handler.addQuickElement(u"link", item['link'], {}) + if item['description'] is not None: + handler.addQuickElement(u"description", item['description'], {}) + handler.endElement(u"item") + +class Rss201rev2Feed(RssFeed): + # Spec: http://blogs.law.harvard.edu/tech/rss + def writeRssElement(self, handler): + handler.startElement(u"rss", {u"version": u"2.0"}) + + def writeRssItem(self, handler, item): + handler.startElement(u"item", {}) + handler.addQuickElement(u"title", item['title'], {}) + handler.addQuickElement(u"link", item['link'], {}) + if item['description'] is not None: + handler.addQuickElement(u"description", item['description'], {}) + if item['author_email'] is not None and item['author_name'] is not None: + handler.addQuickElement(u"author", u"%s (%s)" % \ + (item['author_email'], item['author_name']), {}) + if item['pubdate'] is not None: + handler.addQuickElement(u"pubDate", item['pubdate'].strftime('%a, %d %b %Y %H:%M:%S %Z'), {}) + if item['comments'] is not None: + handler.addQuickElement(u"comments", item['comments'], {}) + if item['unique_id'] is not None: + handler.addQuickElement(u"guid", item['unique_id'], {}) + if item['enclosure'] is not None: + handler.addQuickElement(u"enclosure", '', + {u"url": item['enclosure'].url, u"length": item['enclosure'].length, + u"type": item['enclosure'].mime_type}) + handler.endElement(u"item") + +# This isolates the decision of what the system default is, so calling code can +# do "feedgenerator.DefaultRssFeed" instead of "feedgenerator.Rss201rev2Feed". +DefaultRssFeed = Rss201rev2Feed diff --git a/django/utils/html.py b/django/utils/html.py new file mode 100644 index 0000000000..13ee6e742a --- /dev/null +++ b/django/utils/html.py @@ -0,0 +1,110 @@ +"Useful HTML utilities suitable for global use by World Online projects." + +import re, string + +# Configuration for urlize() function +LEADING_PUNCTUATION = ['(', '<', '<'] +TRAILING_PUNCTUATION = ['.', ',', ')', '>', '\n', '>'] + +# list of possible strings used for bullets in bulleted lists +DOTS = ['·', '*', '\xe2\x80\xa2', '•', '•', '•'] + +UNENCODED_AMPERSANDS_RE = re.compile(r'&(?!(\w+|#\d+);)') +WORD_SPLIT_RE = re.compile(r'(\s+)') +PUNCTUATION_RE = re.compile('^(?P<lead>(?:%s)*)(?P<middle>.*?)(?P<trail>(?:%s)*)$' % \ + ('|'.join([re.escape(p) for p in LEADING_PUNCTUATION]), + '|'.join([re.escape(p) for p in TRAILING_PUNCTUATION]))) +SIMPLE_EMAIL_RE = re.compile(r'^\S+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$') +LINK_TARGET_ATTRIBUTE = re.compile(r'(<a [^>]*?)target=[^\s>]+') +HTML_GUNK = re.compile(r'(?:<br clear="all">|<i><\/i>|<b><\/b>|<em><\/em>|<strong><\/strong>|<\/?smallcaps>|<\/?uppercase>)', re.IGNORECASE) +HARD_CODED_BULLETS = re.compile(r'((?:<p>(?:%s).*?[a-zA-Z].*?</p>\s*)+)' % '|'.join([re.escape(d) for d in DOTS]), re.DOTALL) +TRAILING_EMPTY_CONTENT = re.compile(r'(?:<p>(?: |\s|<br \/>)*?</p>\s*)+\Z') + +def escape(html): + "Returns the given HTML with ampersands, quotes and carets encoded" + if not isinstance(html, basestring): + html = str(html) + return html.replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"') + +def linebreaks(value): + "Converts newlines into <p> and <br />s" + value = re.sub(r'\r\n|\r|\n', '\n', value) # normalize newlines + paras = re.split('\n{2,}', value) + paras = ['<p>%s</p>' % p.strip().replace('\n', '<br />') for p in paras] + return '\n\n'.join(paras) + +def strip_tags(value): + "Returns the given HTML with all tags stripped" + return re.sub(r'<[^>]*?>', '', value) + +def strip_entities(value): + "Returns the given HTML with all entities (&something;) stripped" + return re.sub(r'&(?:\w+|#\d);', '', value) + +def fix_ampersands(value): + "Returns the given HTML with all unencoded ampersands encoded correctly" + return UNENCODED_AMPERSANDS_RE.sub('&', value) + +def urlize(text, trim_url_limit=None, nofollow=False): + """ + Converts any URLs in text into clickable links. Works on http://, https:// and + www. links. Links can have trailing punctuation (periods, commas, close-parens) + and leading punctuation (opening parens) and it'll still do the right thing. + + If trim_url_limit is not None, the URLs in link text will be limited to + trim_url_limit characters. + + If nofollow is True, the URLs in link text will get a rel="nofollow" attribute. + """ + trim_url = lambda x, limit=trim_url_limit: limit is not None and (x[:limit] + (len(x) >=limit and '...' or '')) or x + words = WORD_SPLIT_RE.split(text) + nofollow_attr = nofollow and ' rel="nofollow"' or '' + for i, word in enumerate(words): + match = PUNCTUATION_RE.match(word) + if match: + lead, middle, trail = match.groups() + if middle.startswith('www.') or ('@' not in middle and not middle.startswith('http://') and \ + len(middle) > 0 and middle[0] in string.letters + string.digits and \ + (middle.endswith('.org') or middle.endswith('.net') or middle.endswith('.com'))): + middle = '<a href="http://%s"%s>%s</a>' % (middle, nofollow_attr, trim_url(middle)) + if middle.startswith('http://') or middle.startswith('https://'): + middle = '<a href="%s"%s>%s</a>' % (middle, nofollow_attr, trim_url(middle)) + if '@' in middle and not middle.startswith('www.') and not ':' in middle \ + and SIMPLE_EMAIL_RE.match(middle): + middle = '<a href="mailto:%s">%s</a>' % (middle, middle) + if lead + middle + trail != word: + words[i] = lead + middle + trail + return ''.join(words) + +def clean_html(text): + """ + Cleans the given HTML. Specifically, it does the following: + * Converts <b> and <i> to <strong> and <em>. + * Encodes all ampersands correctly. + * Removes all "target" attributes from <a> tags. + * Removes extraneous HTML, such as presentational tags that open and + immediately close and <br clear="all">. + * Converts hard-coded bullets into HTML unordered lists. + * Removes stuff like "<p> </p>", but only if it's at the + bottom of the text. + """ + from django.utils.text import normalize_newlines + text = normalize_newlines(text) + text = re.sub(r'<(/?)\s*b\s*>', '<\\1strong>', text) + text = re.sub(r'<(/?)\s*i\s*>', '<\\1em>', text) + text = fix_ampersands(text) + # Remove all target="" attributes from <a> tags. + text = LINK_TARGET_ATTRIBUTE.sub('\\1', text) + # Trim stupid HTML such as <br clear="all">. + text = HTML_GUNK.sub('', text) + # Convert hard-coded bullets into HTML unordered lists. + def replace_p_tags(match): + s = match.group().replace('</p>', '</li>') + for d in DOTS: + s = s.replace('<p>%s' % d, '<li>') + return '<ul>\n%s\n</ul>' % s + text = HARD_CODED_BULLETS.sub(replace_p_tags, text) + # Remove stuff like "<p> </p>", but only if it's at the bottom of the text. + text = TRAILING_EMPTY_CONTENT.sub('', text) + return text + diff --git a/django/utils/httpwrappers.py b/django/utils/httpwrappers.py new file mode 100644 index 0000000000..513a5bc0d7 --- /dev/null +++ b/django/utils/httpwrappers.py @@ -0,0 +1,319 @@ +from Cookie import SimpleCookie +from pprint import pformat +import datastructures + +DEFAULT_MIME_TYPE = 'text/html' + +class HttpRequest(object): # needs to be new-style class because subclasses define "property"s + "A basic HTTP request" + def __init__(self): + self.GET, self.POST, self.COOKIES, self.META, self.FILES = {}, {}, {}, {}, {} + self.path = '' + + def __repr__(self): + return '<HttpRequest\nGET:%s,\nPOST:%s,\nCOOKIES:%s,\nMETA:%s>' % \ + (pformat(self.GET), pformat(self.POST), pformat(self.COOKIES), + pformat(self.META)) + + def __getitem__(self, key): + for d in (self.POST, self.GET): + if d.has_key(key): + return d[key] + raise KeyError, "%s not found in either POST or GET" % key + + def get_full_path(self): + return '' + +class ModPythonRequest(HttpRequest): + def __init__(self, req): + self._req = req + self.path = req.uri + + def __repr__(self): + return '<ModPythonRequest\nGET:%s,\nPOST:%s,\nCOOKIES:%s,\nMETA:%s>' % \ + (pformat(self.GET), pformat(self.POST), pformat(self.COOKIES), + pformat(self.META)) + + def get_full_path(self): + return '%s%s' % (self.path, self._req.args and ('?' + self._req.args) or '') + + def _load_post_and_files(self): + "Populates self._post and self._files" + if self._req.headers_in.has_key('content-type') and self._req.headers_in['content-type'].startswith('multipart'): + self._post, self._files = parse_file_upload(self._req) + else: + self._post, self._files = QueryDict(self._req.read()), datastructures.MultiValueDict() + + def _get_request(self): + if not hasattr(self, '_request'): + self._request = datastructures.MergeDict(self.POST, self.GET) + return self._request + + def _get_get(self): + if not hasattr(self, '_get'): + self._get = QueryDict(self._req.args) + return self._get + + def _set_get(self, get): + self._get = get + + def _get_post(self): + if not hasattr(self, '_post'): + self._load_post_and_files() + return self._post + + def _set_post(self, post): + self._post = post + + def _get_cookies(self): + if not hasattr(self, '_cookies'): + self._cookies = parse_cookie(self._req.headers_in.get('cookie', '')) + return self._cookies + + def _set_cookies(self, cookies): + self._cookies = cookies + + def _get_files(self): + if not hasattr(self, '_files'): + self._load_post_and_files() + return self._files + + def _get_meta(self): + "Lazy loader that returns self.META dictionary" + if not hasattr(self, '_meta'): + self._meta = { + 'AUTH_TYPE': self._req.ap_auth_type, + 'CONTENT_LENGTH': self._req.clength, # This may be wrong + 'CONTENT_TYPE': self._req.content_type, # This may be wrong + 'GATEWAY_INTERFACE': 'CGI/1.1', + 'PATH_INFO': self._req.path_info, + 'PATH_TRANSLATED': None, # Not supported + 'QUERY_STRING': self._req.args, + 'REMOTE_ADDR': self._req.connection.remote_ip, + 'REMOTE_HOST': None, # DNS lookups not supported + 'REMOTE_IDENT': self._req.connection.remote_logname, + 'REMOTE_USER': self._req.user, + 'REQUEST_METHOD': self._req.method, + 'SCRIPT_NAME': None, # Not supported + 'SERVER_NAME': self._req.server.server_hostname, + 'SERVER_PORT': self._req.server.port, + 'SERVER_PROTOCOL': self._req.protocol, + 'SERVER_SOFTWARE': 'mod_python' + } + for key, value in self._req.headers_in.items(): + key = 'HTTP_' + key.upper().replace('-', '_') + self._meta[key] = value + return self._meta + + GET = property(_get_get, _set_get) + POST = property(_get_post, _set_post) + COOKIES = property(_get_cookies, _set_cookies) + FILES = property(_get_files) + META = property(_get_meta) + REQUEST = property(_get_request) + +def parse_file_upload(req): + "Returns a tuple of (POST MultiValueDict, FILES MultiValueDict), given a mod_python req object" + import email, email.Message + from cgi import parse_header + raw_message = '\r\n'.join(['%s:%s' % pair for pair in req.headers_in.items()]) + raw_message += '\r\n\r\n' + req.read() + msg = email.message_from_string(raw_message) + POST = datastructures.MultiValueDict() + FILES = datastructures.MultiValueDict() + for submessage in msg.get_payload(): + if isinstance(submessage, email.Message.Message): + name_dict = parse_header(submessage['Content-Disposition'])[1] + # name_dict is something like {'name': 'file', 'filename': 'test.txt'} for file uploads + # or {'name': 'blah'} for POST fields + # We assume all uploaded files have a 'filename' set. + if name_dict.has_key('filename'): + assert type([]) != type(submessage.get_payload()), "Nested MIME messages are not supported" + if not name_dict['filename'].strip(): + continue + # IE submits the full path, so trim everything but the basename. + # (We can't use os.path.basename because it expects Linux paths.) + filename = name_dict['filename'][name_dict['filename'].rfind("\\")+1:] + FILES.appendlist(name_dict['name'], { + 'filename': filename, + 'content-type': (submessage.has_key('Content-Type') and submessage['Content-Type'] or None), + 'content': submessage.get_payload(), + }) + else: + POST.appendlist(name_dict['name'], submessage.get_payload()) + return POST, FILES + +class QueryDict(datastructures.MultiValueDict): + """A specialized MultiValueDict that takes a query string when initialized. + This is immutable unless you create a copy of it.""" + def __init__(self, query_string): + try: + from mod_python.util import parse_qsl + except ImportError: + from cgi import parse_qsl + if not query_string: + self.data = {} + self._keys = [] + else: + self.data = {} + self._keys = [] + for name, value in parse_qsl(query_string, True): # keep_blank_values=True + if name in self.data: + self.data[name].append(value) + else: + self.data[name] = [value] + if name not in self._keys: + self._keys.append(name) + self._mutable = False + + def __setitem__(self, key, value): + if not self._mutable: + raise AttributeError, "This QueryDict instance is immutable" + else: + self.data[key] = [value] + if not key in self._keys: + self._keys.append(key) + + def setlist(self, key, list_): + if not self._mutable: + raise AttributeError, "This QueryDict instance is immutable" + else: + self.data[key] = list_ + if not key in self._keys: + self._keys.append(key) + + def copy(self): + "Returns a mutable copy of this object" + cp = datastructures.MultiValueDict.copy(self) + cp._mutable = True + return cp + + def assert_synchronized(self): + assert(len(self._keys) == len(self.data.keys())), \ + "QueryDict data structure is out of sync: %s %s" % (str(self._keys), str(self.data)) + + def items(self): + "Respect order preserved by self._keys" + self.assert_synchronized() + items = [] + for key in self._keys: + if key in self.data: + items.append((key, self.data[key][0])) + return items + + def keys(self): + self.assert_synchronized() + return self._keys + +def parse_cookie(cookie): + if cookie == '': + return {} + c = SimpleCookie() + c.load(cookie) + cookiedict = {} + for key in c.keys(): + cookiedict[key] = c.get(key).value + return cookiedict + +class HttpResponse: + "A basic HTTP response, with content and dictionary-accessed headers" + def __init__(self, content='', mimetype=DEFAULT_MIME_TYPE): + self.content = content + self.headers = {'Content-Type':mimetype} + self.cookies = SimpleCookie() + self.status_code = 200 + + def __str__(self): + "Full HTTP message, including headers" + return '\n'.join(['%s: %s' % (key, value) + for key, value in self.headers.items()]) \ + + '\n\n' + self.content + + def __setitem__(self, header, value): + self.headers[header] = value + + def __delitem__(self, header): + try: + del self.headers[header] + except KeyError: + pass + + def __getitem__(self, header): + return self.headers[header] + + def has_header(self, header): + "Case-insensitive check for a header" + header = header.lower() + for key in self.headers.keys(): + if key.lower() == header: + return True + return False + + def set_cookie(self, key, value='', max_age=None, path='/', domain=None, secure=None): + self.cookies[key] = value + for var in ('max_age', 'path', 'domain', 'secure'): + val = locals()[var] + if val is not None: + self.cookies[key][var.replace('_', '-')] = val + + def get_content_as_string(self, encoding): + """ + Returns the content as a string, encoding it from a Unicode object if + necessary. + """ + if isinstance(self.content, unicode): + return self.content.encode(encoding) + return self.content + + # The remaining methods partially implement the file-like object interface. + # See http://docs.python.org/lib/bltin-file-objects.html + def write(self, content): + self.content += content + + def flush(self): + pass + + def tell(self): + return len(self.content) + +class HttpResponseRedirect(HttpResponse): + def __init__(self, redirect_to): + HttpResponse.__init__(self) + self['Location'] = redirect_to + self.status_code = 302 + +class HttpResponseNotModified(HttpResponse): + def __init__(self): + HttpResponse.__init__(self) + self.status_code = 304 + +class HttpResponseNotFound(HttpResponse): + def __init__(self, content='', mimetype=DEFAULT_MIME_TYPE): + HttpResponse.__init__(self, content, mimetype) + self.status_code = 404 + +class HttpResponseForbidden(HttpResponse): + def __init__(self, content='', mimetype=DEFAULT_MIME_TYPE): + HttpResponse.__init__(self, content, mimetype) + self.status_code = 403 + +class HttpResponseGone(HttpResponse): + def __init__(self, content='', mimetype=DEFAULT_MIME_TYPE): + HttpResponse.__init__(self, content, mimetype) + self.status_code = 410 + +class HttpResponseServerError(HttpResponse): + def __init__(self, content='', mimetype=DEFAULT_MIME_TYPE): + HttpResponse.__init__(self, content, mimetype) + self.status_code = 500 + +def populate_apache_request(http_response, mod_python_req): + "Populates the mod_python request object with an HttpResponse" + mod_python_req.content_type = http_response['Content-Type'] or DEFAULT_MIME_TYPE + del http_response['Content-Type'] + if http_response.cookies: + mod_python_req.headers_out['Set-Cookie'] = http_response.cookies.output(header='') + for key, value in http_response.headers.items(): + mod_python_req.headers_out[key] = value + mod_python_req.status = http_response.status_code + mod_python_req.write(http_response.get_content_as_string('utf-8')) diff --git a/django/utils/images.py b/django/utils/images.py new file mode 100644 index 0000000000..75424f16a2 --- /dev/null +++ b/django/utils/images.py @@ -0,0 +1,22 @@ +""" +Utility functions for handling images. + +Requires PIL, as you might imagine. +""" + +import ImageFile + +def get_image_dimensions(path): + """Returns the (width, height) of an image at a given path.""" + p = ImageFile.Parser() + fp = open(path) + while 1: + data = fp.read(1024) + if not data: + break + p.feed(data) + if p.image: + return p.image.size + break + fp.close() + return None diff --git a/django/utils/stopwords.py b/django/utils/stopwords.py new file mode 100644 index 0000000000..dea5660413 --- /dev/null +++ b/django/utils/stopwords.py @@ -0,0 +1,42 @@ +# Performance note: I benchmarked this code using a set instead of +# a list for the stopwords and was surprised to find that the list +# performed /better/ than the set - maybe because it's only a small +# list. + +stopwords = ''' +i +a +an +are +as +at +be +by +for +from +how +in +is +it +of +on +or +that +the +this +to +was +what +when +where +'''.split() + +def strip_stopwords(sentence): + "Removes stopwords - also normalizes whitespace" + words = sentence.split() + sentence = [] + for word in words: + if word.lower() not in stopwords: + sentence.append(word) + return ' '.join(sentence) + diff --git a/django/utils/text.py b/django/utils/text.py new file mode 100644 index 0000000000..cb9e9454d7 --- /dev/null +++ b/django/utils/text.py @@ -0,0 +1,108 @@ +import re + +def wrap(text, width): + """ + A word-wrap function that preserves existing line breaks and most spaces in + the text. Expects that existing line breaks are posix newlines (\n). + See http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/148061 + """ + return reduce(lambda line, word, width=width: '%s%s%s' % + (line, + ' \n'[(len(line[line.rfind('\n')+1:]) + + len(word.split('\n',1)[0] + ) >= width)], + word), + text.split(' ') + ) + +def truncate_words(s, num): + "Truncates a string after a certain number of words." + length = int(num) + words = s.split() + if len(words) > length: + words = words[:length] + if not words[-1].endswith('...'): + words.append('...') + return ' '.join(words) + +def get_valid_filename(s): + """ + Returns the given string converted to a string that can be used for a clean + filename. Specifically, leading and trailing spaces are removed; other + spaces are converted to underscores; and all non-filename-safe characters + are removed. + >>> get_valid_filename("john's portrait in 2004.jpg") + 'johns_portrait_in_2004.jpg' + """ + s = s.strip().replace(' ', '_') + return re.sub(r'[^-A-Za-z0-9_.]', '', s) + +def fix_microsoft_characters(s): + """ + Converts Microsoft proprietary characters (e.g. smart quotes, em-dashes) + to sane characters + """ + # Sources: + # http://stsdas.stsci.edu/bps/pythontalk8.html + # http://www.waider.ie/hacks/workshop/perl/rss-fetch.pl + # http://www.fourmilab.ch/webtools/demoroniser/ + return s + s = s.replace('\x91', "'") + s = s.replace('\x92', "'") + s = s.replace('\x93', '"') + s = s.replace('\x94', '"') + s = s.replace('\xd2', '"') + s = s.replace('\xd3', '"') + s = s.replace('\xd5', "'") + s = s.replace('\xad', '--') + s = s.replace('\xd0', '--') + s = s.replace('\xd1', '--') + s = s.replace('\xe2\x80\x98', "'") # weird single quote (open) + s = s.replace('\xe2\x80\x99', "'") # weird single quote (close) + s = s.replace('\xe2\x80\x9c', '"') # weird double quote (open) + s = s.replace('\xe2\x80\x9d', '"') # weird double quote (close) + s = s.replace('\xe2\x81\x84', '/') + s = s.replace('\xe2\x80\xa6', '...') + s = s.replace('\xe2\x80\x94', '--') + return s + +def get_text_list(list_, last_word='or'): + """ + >>> get_text_list(['a', 'b', 'c', 'd']) + 'a, b, c or d' + >>> get_text_list(['a', 'b', 'c'], 'and') + 'a, b and c' + >>> get_text_list(['a', 'b'], 'and') + 'a and b' + >>> get_text_list(['a']) + 'a' + >>> get_text_list([]) + '' + """ + if len(list_) == 0: return '' + if len(list_) == 1: return list_[0] + return '%s %s %s' % (', '.join([i for i in list_][:-1]), last_word, list_[-1]) + +def normalize_newlines(text): + return re.sub(r'\r\n|\r|\n', '\n', text) + +def recapitalize(text): + "Recapitalizes text, placing caps after end-of-sentence punctuation." + capwords = 'I Jayhawk Jayhawks Lawrence Kansas KS'.split() + text = text.lower() + capsRE = re.compile(r'(?:^|(?<=[\.\?\!] ))([a-z])') + text = capsRE.sub(lambda x: x.group(1).upper(), text) + for capword in capwords: + capwordRE = re.compile(r'\b%s\b' % capword, re.I) + text = capwordRE.sub(capword, text) + return text + +def phone2numeric(phone): + "Converts a phone number with letters into its numeric equivalent." + letters = re.compile(r'[A-PR-Y]', re.I) + char2number = lambda m: {'a': '2', 'c': '2', 'b': '2', 'e': '3', + 'd': '3', 'g': '4', 'f': '3', 'i': '4', 'h': '4', 'k': '5', + 'j': '5', 'm': '6', 'l': '5', 'o': '6', 'n': '6', 'p': '7', + 's': '7', 'r': '7', 'u': '8', 't': '8', 'w': '9', 'v': '8', + 'y': '9', 'x': '9'}.get(m.group(0).lower()) + return letters.sub(char2number, phone) diff --git a/django/utils/timesince.py b/django/utils/timesince.py new file mode 100644 index 0000000000..c11cef0342 --- /dev/null +++ b/django/utils/timesince.py @@ -0,0 +1,46 @@ +import time, math, datetime + +def timesince(d, now=None): + """ + Takes a datetime object, returns the time between then and now + as a nicely formatted string, e.g "10 minutes" + Adapted from http://blog.natbat.co.uk/archive/2003/Jun/14/time_since + """ + original = time.mktime(d.timetuple()) + chunks = ( + (60 * 60 * 24 * 365, 'year'), + (60 * 60 * 24 * 30, 'month'), + (60 * 60 * 24, 'day'), + (60 * 60, 'hour'), + (60, 'minute') + ) + if not now: + now = time.time() + since = now - original + # Crazy iteration syntax because we need i to be current index + for i, (seconds, name) in zip(range(len(chunks)), chunks): + count = math.floor(since / seconds) + if count != 0: + break + if count == 1: + s = '1 %s' % name + else: + s = '%d %ss' % (count, name) + if i + 1 < len(chunks): + # Now get the second item + seconds2, name2 = chunks[i + 1] + count2 = math.floor((since - (seconds * count)) / seconds2) + if count2 != 0: + if count2 == 1: + s += ', 1 %s' % name2 + else: + s += ', %d %ss' % (count2, name2) + return s + +def timeuntil(d): + """ + Like timesince, but returns a string measuring the time until + the given time. + """ + now = datetime.datetime.now() + return timesince(now, time.mktime(d.timetuple())) diff --git a/django/utils/xmlutils.py b/django/utils/xmlutils.py new file mode 100644 index 0000000000..6638573857 --- /dev/null +++ b/django/utils/xmlutils.py @@ -0,0 +1,13 @@ +""" +Utilities for XML generation/parsing. +""" + +from xml.sax.saxutils import XMLGenerator + +class SimplerXMLGenerator(XMLGenerator): + def addQuickElement(self, name, contents=None, attrs={}): + "Convenience method for adding an element with no children" + self.startElement(name, attrs) + if contents is not None: + self.characters(contents) + self.endElement(name) |
