diff options
| author | Christopher Long <indirecthit@gmail.com> | 2007-06-17 22:18:54 +0000 |
|---|---|---|
| committer | Christopher Long <indirecthit@gmail.com> | 2007-06-17 22:18:54 +0000 |
| commit | ae22b6d403dcf25098c77f0dfcf59ae58b186461 (patch) | |
| tree | c37fc631e99a7e4d909d6b6d236f495003731ea7 /django/utils/text.py | |
| parent | 0cf7bc439129c66df8d64601e885f83b256b4f25 (diff) | |
per-object-permissions: Merged to trunk [5486] NOTE: Not fully tested, will be working on this over the next few weeks.
git-svn-id: http://code.djangoproject.com/svn/django/branches/per-object-permissions@5488 bcc190cf-cafb-0310-a4f2-bffc1f526a37
Diffstat (limited to 'django/utils/text.py')
| -rw-r--r-- | django/utils/text.py | 67 |
1 files changed, 64 insertions, 3 deletions
diff --git a/django/utils/text.py b/django/utils/text.py index 217f42491b..c73ab908f3 100644 --- a/django/utils/text.py +++ b/django/utils/text.py @@ -17,7 +17,7 @@ def wrap(text, width): pos = len(word) - word.rfind('\n') - 1 for word in it: if "\n" in word: - lines = word.splitlines() + lines = word.split('\n') else: lines = (word,) pos += len(lines[0]) + 1 @@ -41,6 +41,66 @@ def truncate_words(s, num): words.append('...') return ' '.join(words) +def truncate_html_words(s, num): + """ + Truncates html to a certain number of words (not counting tags and comments). + Closes opened tags if they were correctly closed in the given html. + """ + length = int(num) + if length <= 0: + return '' + html4_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area', 'hr', 'input') + # Set up regular expressions + re_words = re.compile(r'&.*?;|<.*?>|([A-Za-z0-9][\w-]*)') + re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>') + # Count non-HTML words and keep note of open tags + pos = 0 + ellipsis_pos = 0 + words = 0 + open_tags = [] + while words <= length: + m = re_words.search(s, pos) + if not m: + # Checked through whole string + break + pos = m.end(0) + if m.group(1): + # It's an actual non-HTML word + words += 1 + if words == length: + ellipsis_pos = pos + continue + # Check for tag + tag = re_tag.match(m.group(0)) + if not tag or ellipsis_pos: + # Don't worry about non tags or tags after our truncate point + continue + closing_tag, tagname, self_closing = tag.groups() + tagname = tagname.lower() # Element names are always case-insensitive + if self_closing or tagname in html4_singlets: + pass + elif closing_tag: + # Check for match in open tags list + try: + i = open_tags.index(tagname) + except ValueError: + pass + else: + # SGML: An end tag closes, back to the matching start tag, all unclosed intervening start tags with omitted end tags + open_tags = open_tags[i+1:] + else: + # Add it to the start of the open tags list + open_tags.insert(0, tagname) + if words <= length: + # Don't try to close tags if we don't need to truncate + return s + out = s[:ellipsis_pos] + ' ...' + # Close any tags still open + for tag in open_tags: + out += '</%s>' % tag + # Return string + return out + def get_valid_filename(s): """ Returns the given string converted to a string that can be used for a clean @@ -131,14 +191,15 @@ def smart_split(text): Supports both single and double quotes, and supports escaping quotes with backslashes. In the output, strings will keep their initial and trailing quote marks. + >>> list(smart_split('This is "a person\'s" test.')) ['This', 'is', '"a person\'s"', 'test.'] """ for bit in smart_split_re.finditer(text): bit = bit.group(0) - if bit[0] == '"': + if bit[0] == '"' and bit[-1] == '"': yield '"' + bit[1:-1].replace('\\"', '"').replace('\\\\', '\\') + '"' - elif bit[0] == "'": + elif bit[0] == "'" and bit[-1] == "'": yield "'" + bit[1:-1].replace("\\'", "'").replace("\\\\", "\\") + "'" else: yield bit |
