diff options
| author | Malcolm Tredinnick <malcolm.tredinnick@gmail.com> | 2007-02-10 02:51:27 +0000 |
|---|---|---|
| committer | Malcolm Tredinnick <malcolm.tredinnick@gmail.com> | 2007-02-10 02:51:27 +0000 |
| commit | 5a0b72a6eb41a66af14d6256fa382380399eabfb (patch) | |
| tree | c9637ad788a6564346f5469dfc3f6cf732bedbdd /django/utils/text.py | |
| parent | 2a14daffb0571af26438a7c838ef897a1113cc7a (diff) | |
Fixed #2027 -- added truncatewords_html filter that respects HTML tags whilst
truncating. Patch from SmileyChris.
git-svn-id: http://code.djangoproject.com/svn/django/trunk@4468 bcc190cf-cafb-0310-a4f2-bffc1f526a37
Diffstat (limited to 'django/utils/text.py')
| -rw-r--r-- | django/utils/text.py | 60 |
1 files changed, 60 insertions, 0 deletions
diff --git a/django/utils/text.py b/django/utils/text.py index 217f42491b..1c1c456e2d 100644 --- a/django/utils/text.py +++ b/django/utils/text.py @@ -41,6 +41,66 @@ def truncate_words(s, num): words.append('...') return ' '.join(words) +def truncate_html_words(s, num): + """ + Truncates html to a certain number of words (not counting tags and comments). + Closes opened tags if they were correctly closed in the given html. + """ + length = int(num) + if length <= 0: + return '' + html4_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area', 'hr', 'input') + # Set up regular expressions + re_words = re.compile(r'&.*?;|<.*?>|([A-Za-z0-9][\w-]*)') + re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>') + # Count non-HTML words and keep note of open tags + pos = 0 + ellipsis_pos = 0 + words = 0 + open_tags = [] + while words <= length: + m = re_words.search(s, pos) + if not m: + # Checked through whole string + break + pos = m.end(0) + if m.group(1): + # It's an actual non-HTML word + words += 1 + if words == length: + ellipsis_pos = pos + continue + # Check for tag + tag = re_tag.match(m.group(0)) + if not tag or ellipsis_pos: + # Don't worry about non tags or tags after our truncate point + continue + closing_tag, tagname, self_closing = tag.groups() + tagname = tagname.lower() # Element names are always case-insensitive + if self_closing or tagname in html4_singlets: + pass + elif closing_tag: + # Check for match in open tags list + try: + i = open_tags.index(tagname) + except ValueError: + pass + else: + # SGML: An end tag closes, back to the matching start tag, all unclosed intervening start tags with omitted end tags + open_tags = open_tags[i+1:] + else: + # Add it to the start of the open tags list + open_tags.insert(0, tagname) + if words <= length: + # Don't try to close tags if we don't need to truncate + return s + out = s[:ellipsis_pos] + ' ...' + # Close any tags still open + for tag in open_tags: + out += '</%s>' % tag + # Return string + return out + def get_valid_filename(s): """ Returns the given string converted to a string that can be used for a clean |
